diff options
| author | Claudius "keldu" Holeksa <mail@keldu.de> | 2026-04-22 16:49:30 +0200 |
|---|---|---|
| committer | Claudius "keldu" Holeksa <mail@keldu.de> | 2026-04-22 16:49:30 +0200 |
| commit | 1aee54ea763a19590f4aa41f991cef8f91cf6a93 (patch) | |
| tree | 7e45e43e9152520d97f991d62dd86362c27cd054 | |
| parent | 0f0af5c3e095637c7ede1c8dc20b248a3471dc3a (diff) | |
| parent | 45ebf7411d687ab5530431ab1bcc74edb0499c69 (diff) | |
| download | libs-lbm-1aee54ea763a19590f4aa41f991cef8f91cf6a93.tar.gz | |
Merge branch 'dev'
| -rw-r--r-- | default.nix | 2 | ||||
| -rw-r--r-- | examples/poiseulle_3d_gpu/sim.cpp | 4 | ||||
| -rw-r--r-- | examples/poiseulle_particles_2d_bgk_gpu/sim.cpp | 23 | ||||
| -rw-r--r-- | examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp | 20 | ||||
| -rw-r--r-- | examples/poiseulle_particles_2d_psm_gpu/sim.cpp | 19 | ||||
| -rw-r--r-- | examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp | 3 | ||||
| -rw-r--r-- | examples/settling_cubes_2d_ibm_gpu/sim.cpp | 189 | ||||
| -rw-r--r-- | lib/core/c++/abstract/data.hpp | 26 | ||||
| -rw-r--r-- | lib/core/c++/particle.hpp | 5 | ||||
| -rw-r--r-- | lib/core/c++/particle/particle.hpp | 35 | ||||
| -rw-r--r-- | lib/core/c++/write_vtk.hpp | 4 | ||||
| -rw-r--r-- | lib/sycl/c++/data.hpp | 596 | ||||
| -rw-r--r-- | lib/sycl/tests/data.cpp | 35 | ||||
| -rwxr-xr-x | util/podman/norce_build_and_run.sh | 6 |
14 files changed, 831 insertions, 136 deletions
diff --git a/default.nix b/default.nix index 05ca001..6940267 100644 --- a/default.nix +++ b/default.nix @@ -71,7 +71,7 @@ let src = builtins.fetchurl { url = "https://git.keldu.de/forstio-forstio/snapshot/master.tar.gz"; - sha256 = "sha256:026scvd9015blphbj49d0cv5wfrxh1szzdv3p212wzk4iw643akj"; + sha256 = "sha256:17zsz10lj5dgqw3fmassgqlhbwgpd7zznbq8cl0sw1v816w3ca8z"; }; phases = [ "unpackPhase" "installPhase" ]; diff --git a/examples/poiseulle_3d_gpu/sim.cpp b/examples/poiseulle_3d_gpu/sim.cpp index 93df1f8..6916ef2 100644 --- a/examples/poiseulle_3d_gpu/sim.cpp +++ b/examples/poiseulle_3d_gpu/sim.cpp @@ -315,8 +315,8 @@ saw::error_or<void> lbm_main(int argc, char** argv){ saw::data<sch::FixedArray<sch::Particle<T,Desc::D>, particle_size>, encode::Sycl<saw::encode::Native>> lbm_sycl_particle_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp index 7d1a624..b8ac35e 100644 --- a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp @@ -183,11 +183,11 @@ saw::error_or<void> step( component<T,Desc,cmpt::ZouHeHorizontal<true>,encode::Sycl<saw::encode::Native>> flow_in{ [&](){ - uint64_t target_t_i = 64u; + uint64_t target_t_i = 32u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component<T,Desc,cmpt::ZouHeHorizontal<false>,encode::Sycl<saw::encode::Native>> flow_out{1.0}; @@ -205,19 +205,18 @@ saw::error_or<void> step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); break; case 3u: - //flow_in.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_in.apply(fields,index,t_i); + //equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; case 4u: - // flow_out.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_out.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; default: @@ -268,7 +267,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ {{1.0}} }; - print_lbm_meta<T,Desc>(conv,{0.05},{0.01},{0.4 * dim_y}); + print_lbm_meta<T,Desc>(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>(); @@ -314,8 +313,6 @@ saw::error_or<void> lbm_main(int argc, char** argv){ saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -329,6 +326,8 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); saw::data<sch::UInt64> time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -343,7 +342,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } sycl_q.wait(); /* - { + if(i.get() % 128u == 0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); if(eov.is_error()){ diff --git a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp index 9eeda3c..7cfd567 100644 --- a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp @@ -191,9 +191,9 @@ saw::error_or<void> step( [&](){ uint64_t target_t_i = 64u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component<T,Desc,cmpt::ZouHeHorizontal<false>,encode::Sycl<saw::encode::Native>> flow_out{1.0}; @@ -211,19 +211,19 @@ saw::error_or<void> step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); break; case 3u: - //flow_in.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_in.apply(fields,index,t_i); + // equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; case 4u: - // flow_out.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_out.apply(fields,index,t_i); + // equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; default: @@ -274,7 +274,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ {{1.0}} }; - print_lbm_meta<T,Desc>(conv,{0.05},{0.01},{0.4 * dim_y}); + print_lbm_meta<T,Desc>(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>(); @@ -320,8 +320,6 @@ saw::error_or<void> lbm_main(int argc, char** argv){ saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -335,6 +333,8 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); saw::data<sch::UInt64> time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); diff --git a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp index 7e76da6..75d1a14 100644 --- a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp @@ -207,9 +207,9 @@ saw::error_or<void> step( [&](){ uint64_t target_t_i = 64u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component<T,Desc,cmpt::ZouHeHorizontal<false>,encode::Sycl<saw::encode::Native>> flow_out{1.0}; @@ -227,7 +227,7 @@ saw::error_or<void> step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); @@ -289,12 +289,12 @@ saw::error_or<void> lbm_main(int argc, char** argv){ converter<T> conv { // delta_x - {{1e-3}}, + {{1.0}}, // delta_t - {{1e-06}} + {{1.0}} }; - print_lbm_meta<T,Desc>(conv,{0.1},{1e-2},{0.4 * dim_y *1e-3}); + print_lbm_meta<T,Desc>(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>(); @@ -340,8 +340,6 @@ saw::error_or<void> lbm_main(int argc, char** argv){ saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -355,6 +353,9 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); + saw::data<sch::UInt64> time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -368,6 +369,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } sycl_q.wait(); + /* if(i.get() % 32u == 0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); @@ -382,6 +384,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } } + */ /* { { diff --git a/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp b/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp index 7ac663f..1e6e75f 100644 --- a/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp +++ b/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp @@ -461,6 +461,8 @@ void couple_particles_to_lattice( p_acc = p_acc + p_pos_rel_vec; } + + /* So I want to include the relative velocity from both particles, but this is a bit hard considering I just assumed 0 velocity from the other party else if(n_macro_cell_particle.get() != (i.get() + 1u) and n_macro_cell_particle.get() > 0u){ // Generally compare @@ -478,6 +480,7 @@ void couple_particles_to_lattice( auto& n_info = n_cell.template get<"info">()({0u}); auto& n_macro_cell = macros.at(n_p_cell_pos); auto& n_macro_cell_particle = n_macro_cell.template get<"particle">(); + l // If neighbour is wall, then add force pushing the particle away if(n_info.get() <= 1u or (n_macro_cell_particle.get() != (i.get()+1u) and n_macro_cell_particle.get() > 0u) ) { diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index 3281239..d7b402a 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -6,6 +6,7 @@ #include <forstio/remote/filesystem/easy.hpp> #include <forstio/codec/json/json.hpp> #include <forstio/codec/simple.hpp> +#include <forstio/codec/math.hpp> namespace kel { namespace lbm { @@ -50,18 +51,25 @@ using MacroStruct = Struct< >; template<typename T, typename Desc> +using ParticleSpheroidGroup = ParticleGroup< + T, + Desc::D, + sch::ParticleCollisionSpheroid<T,2.0f> +>; + +template<typename T, typename Desc> using ParticleGroups = Tuple< - ParticleGroup< - T,Desc::D,sch::ParticleCollisionSpheroid<T> - > + ParticleSpheroidGroup<T,Desc> >; + + } template<typename T, typename Desc> saw::error_or<void> setup_initial_conditions( saw::data<sch::ChunkStruct<T,Desc>>& fields, saw::data<sch::MacroStruct<T,Desc>>& macros, - saw::data<sch::ParticleGroups<T,Desc>>& particles + saw::data<sch::ParticleSpheroidGroup<T,Desc>>& particles ){ auto& info_f = fields.template get<"info">(); // Set everything as walls @@ -103,15 +111,41 @@ saw::error_or<void> setup_initial_conditions( // Particles { - saw::data<sch::Scalar<T>> rad_p, dense_p; - rad_p.at({}).set(2.0); + saw::data<sch::Scalar<T>> dense_p; dense_p.at({}).set(1); - auto& spheroid_group = particles.template get<0u>(); - spheroid_group = create_spheroid_particle_group<T,Desc::D>( - rad_p, + // auto& spheroid_group = particles.template get<0u>(); + auto& spheroid_group = particles; + + spheroid_group = create_spheroid_particle_group<T,Desc::D,2.0f>( dense_p, {64u} ); + + { + auto& p = spheroid_group.template get<"particles">(); + + p = {{{16u}}}; + + iterator<1u>::apply( + [&](auto& index){ + // Set Pos here? + auto& p_ind = p.at(index); + + auto& p_rb = p_ind.template get<"rigid_body">(); + auto& p_pos = p_rb.template get<"position">(); + + // TODO CONTINUE HERE NEED to init pos here !!!! + + auto& p_pos_old = p_rb.template get<"position_old">(); + p_pos_old = p_pos; + }, + {}, + p.meta() + ); + } + } + // Particle in hacky flavour + { } return saw::make_void(); @@ -119,24 +153,104 @@ saw::error_or<void> setup_initial_conditions( template<typename T, typename Desc> saw::error_or<void> step( - saw::data<sch::Ptr<sch::ChunkStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& fields, - saw::data<sch::Ptr<sch::MacroStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& macros, - saw::data<sch::UInt64> t_i, - device& dev + saw::data<sch::Ptr<sch::ChunkStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& fields, + saw::data<sch::Ptr<sch::MacroStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& macros, + saw::data<sch::Ptr<sch::ParticleSpheroidGroup<T,Desc>>,encode::Sycl<saw::encode::Native>>& p_group, + saw::data<sch::UInt64> t_i, + device& dev ){ auto& q = dev.get_handle(); auto& info_f = fields.template get<"info">(); - { - } + auto& parts = p_group.template get<"particles">(); + auto& p_mask = p_group.template get<"mask">(); + auto& vels = macros.template get<"velocity">(); + auto& forces = macros.template get<"force">(); + + auto p_meta = parts.meta(); + q.submit([&](acpp::sycl::handler& h){ + + h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y}, [=](acpp::sycl::id<Desc::D> idx){ + saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index; + for(uint64_t i = 0u; i < Desc::D; ++i){ + index.at({{i}}).set(idx[i]); + } + + // Reset the force to zero + forces.at(index) = {}; + }); + }).wait(); + + q.submit([&](acpp::sycl::handler& h){ + h.parallel_for(acpp::sycl::range<1u>{p_meta.at({0u}).get()}, [=](acpp::sycl::id<1u> idx){ + + saw::data<sch::FixedArray<sch::UInt64,1u>> index; + for(uint64_t i = 0u; i < 1u; ++i){ + index.at({{i}}).set(idx[i]); + } + auto& p = parts.at(index); + auto& p_rb = p.template get<"rigid_body">(); + saw::data<sch::Scalar<T>> delta_t; + delta_t.at({}).set(1.0f); + + auto& p_pos = p_rb.template get<"position">(); + auto& p_rot = p_rb.template get<"rotation">(); + + iterator<Desc::D>::apply( + [&](auto& m_ind){ + saw::data<sch::Vector<T,Desc::D>> index_shift; + for(uint64_t i{0u}; i < Desc::D; ++i){ + index_shift.at({{i}}) = m_ind.at({i}).template cast_to<T>() - (vels.meta().at({i})+1u).template cast_to<T>() * 0.5; + } + + saw::data<sch::Vector<T,Desc::D>> transformed_pos; + for(uint64_t i{0u}; i < Desc::D; ++i){ + // TODO add rotation, scaling here. + transformed_pos.at({{i}}) = index_shift.at({{i}}); + } + + // Lagrange indicator position + auto p_pos_lag = p_pos + transformed_pos; + + // Pick the closest velocity and clamp it + saw::data<sch::FixedArray<sch::UInt64,Desc::D>> p_cell_pos; + saw::data<sch::Vector<sch::UInt64,Desc::D>> p_cell_pos_vec; + for(uint64_t i{0u}; i < Desc::D; ++i){ + p_cell_pos.at({{i}}) = (p_pos_lag.at({{i}}) + 0.5).template cast_to<sch::UInt64>(); + p_cell_pos.at({{i}}).set(std::max(0ul,std::min(p_cell_pos.at({{i}}).get(), vels.meta().at({{i}}).get() - 2ul))); + p_cell_pos_vec.at({{i}}) = p_cell_pos.at({{i}}); + } + + auto& u_fluid = vels.at(p_cell_pos); + + // this is our relative position to the particle + auto rel_cell_to_part_pos = p_cell_pos_vec.template cast_to<T>() - p_pos; + + auto p_vel = (p_pos - p_rb.template get<"position_old">()) * delta_t; + + auto u_solid = p_vel + saw::math::cross(p_rot,rel_cell_to_part_pos); + + // Force + auto force = (u_solid - u_fluid) / delta_t; + + // TODO HERE ATOMIC! !!!! + forces.at(p_cell_pos) = forces.at(p_cell_pos) + force; + + // TODO APPLY FORCE TO PARTICLE + }, + {}, + p_mask.meta() + ); + + verlet_step_lambda<T,Desc::D>(p,delta_t); + }); + }).wait(); // auto coll_ev = q.submit([&](acpp::sycl::handler& h){ - saw::data<sch::Vector<T,Desc::D>> force; - force.at({{1}}).set(-1.0); // Need nicer things to handle the flow. I see improvement here - component<T,Desc,cmpt::BGKGuo, encode::Sycl<saw::encode::Native>> collision{0.8}; + component<T,Desc,cmpt::BGK, encode::Sycl<saw::encode::Native>> collision{0.8}; component<T,Desc,cmpt::BounceBack,encode::Sycl<saw::encode::Native>> bb; h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y}, [=](acpp::sycl::id<Desc::D> idx){ @@ -154,14 +268,12 @@ saw::error_or<void> step( bb.apply(fields,index,t_i); break; case 2u: - collision.apply(fields,macros,index,t_i); + // collision.apply(fields,macros,index,t_i); break; default: break; } }); - - }).wait(); // Step @@ -210,9 +322,11 @@ saw::error_or<void> lbm_main(int argc, char** argv){ // saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>(); auto lbm_macro_data_ptr = saw::heap<saw::data<sch::MacroStruct<T,Desc>>>(); - auto lbm_particles_data = saw::data<sch::ParticleGroups<T,Desc>>(); + auto lbm_particle_data_ptr = saw::heap<saw::data<sch::ParticleSpheroidGroup<T,Desc>>>(); + // auto lbm_particles_ptr = saw::heap<saw::data<sch::FixedArray<sch::ParticleRigidBody<T,Desc::D>,part_count>>>(); + // saw::data<sch::Array<T,Desc::D>> p_mask; - std::cout<<"Estimated Bytes: "<<memory_estimate<sch::ChunkStruct<T,Desc>,sch::MacroStruct<T,Desc>>().get()<<std::endl; + std::cout<<"Estimated Bytes of LBM Fields: "<<memory_estimate<sch::ChunkStruct<T,Desc>,sch::MacroStruct<T,Desc>>().get()<<std::endl; auto eo_aio = saw::setup_async_io(); if(eo_aio.is_error()){ @@ -236,7 +350,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ sycl_q.wait(); { - auto eov = setup_initial_conditions<T,Desc>(*lbm_data_ptr,*lbm_macro_data_ptr,lbm_particles_data); + auto eov = setup_initial_conditions<T,Desc>(*lbm_data_ptr,*lbm_macro_data_ptr,*lbm_particle_data_ptr); if(eov.is_error()){ return eov; } @@ -250,10 +364,8 @@ saw::error_or<void> lbm_main(int argc, char** argv){ saw::data<sch::ChunkStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_data{sycl_q}; saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q}; - sycl_q.wait(); + saw::data<sch::ParticleSpheroidGroup<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_particle_data{sycl_q}; - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -266,7 +378,24 @@ saw::error_or<void> lbm_main(int argc, char** argv){ return eov; } } + { + auto eov = dev.copy_to_device(*lbm_particle_data_ptr,lbm_sycl_particle_data); + if(eov.is_error()){ + return eov; + } + } sycl_q.wait(); + + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); + auto lsdp_view = make_view(lbm_sycl_particle_data); + + { + auto eov = write_vtk_file(out_dir,"ms",0u, *lbm_macro_data_ptr); + if(eov.is_error()){ + return eov; + } + } saw::data<sch::UInt64> time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -274,14 +403,13 @@ saw::error_or<void> lbm_main(int argc, char** argv){ for(saw::data<sch::UInt64> i{0u}; i < time_steps and krun; ++i){ // BC + Collision { - auto eov = step<T,Desc>(lsd_view,lsdm_view,i,dev); + auto eov = step<T,Desc>(lsd_view,lsdm_view,lsdp_view,i,dev); if(eov.is_error()){ return eov; } } sycl_q.wait(); - /* - { + if(i.get()%1u ==0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); if(eov.is_error()){ @@ -295,7 +423,6 @@ saw::error_or<void> lbm_main(int argc, char** argv){ } } } - */ /* { { diff --git a/lib/core/c++/abstract/data.hpp b/lib/core/c++/abstract/data.hpp index e8f1757..0075718 100644 --- a/lib/core/c++/abstract/data.hpp +++ b/lib/core/c++/abstract/data.hpp @@ -4,28 +4,48 @@ namespace kel { namespace sch { +struct Void {}; + struct UnsignedInteger {}; struct SignedInteger {}; struct FloatingPoint {}; template<typename StorageT, typename InterfaceT> struct MixedPrecision { + using Meta = Void; using StorageType = StorageT; using InterfaceType = InterfaceT; }; template<typename PrimType, uint64_t N> struct Primitive { - using PrimitiveType = PrimType; + using Meta = Void; + using Type = PrimType; static constexpr uint64_t Bytes = N; }; template<typename T, uint64_t... Dims> -struct Array { - using InnerType = T; +struct FixedArray { + using Meta = Void; + using Inner = T; static constexpr std::array<uint64_t,sizeof...(Dims)> Dimensions{Dims...}; }; +template<typename T, uint64_t Dims> +struct Array { + using Meta = FixedArray<UInt64,Dims>; + using Inner = T; + static constexpr std::array<uint64_t,sizeof...(Dims)> Dimensions{Dims}; +}; + +template<typename... T> +struct Tuple { +}; } + +template<typename Sch> +struct schema { + using Type = Sch; +}; } diff --git a/lib/core/c++/particle.hpp b/lib/core/c++/particle.hpp index b098ecc..691a74b 100644 --- a/lib/core/c++/particle.hpp +++ b/lib/core/c++/particle.hpp @@ -11,7 +11,12 @@ struct Particle {}; template<typename T, typename Descriptor, typename Encode> class component<T, Descriptor, cmpt::Particle, Encode> { +private: + saw::data<sch::Scalar<T>> dt_; public: + component(saw::data<sch::Scalar<T>> dt__): + dt_{dt__} + {} template<typename ParticleSchema, typename MacroFieldSchema> void apply(const saw::data<ParticleSchema, Encode>& particles, const saw::data<MacroFieldSchema,Encode>& macros, saw::data<sch::UInt64> index, saw::data<sch::UInt64> time_step) const { diff --git a/lib/core/c++/particle/particle.hpp b/lib/core/c++/particle/particle.hpp index c0d115f..7af43dc 100644 --- a/lib/core/c++/particle/particle.hpp +++ b/lib/core/c++/particle/particle.hpp @@ -40,9 +40,8 @@ using ParticleRigidBody = Struct< Member<typename impl::rotation_type_helper<T,D>::Schema, "angular_acceleration"> >; -template<typename T> +template<typename T, typename saw::native_data_type<T>::type radius = 1.0f> using ParticleCollisionSpheroid = Struct< - Member<Scalar<T>, "radius"> >; template<typename T, uint64_t D> @@ -52,42 +51,26 @@ using Particle = Struct< // Member<Array<Float64,D>, "mask">, >; -template<typename T, uint64_t D, typename CollisionType = ParticleCollisionSpheroid<T>> +template<typename T, uint64_t D, typename CollisionType = ParticleCollisionSpheroid<T,2.0f>> using ParticleGroup = Struct< Member<Array<T,D>, "mask">, - Member<CollisionType, "collision">, - Member<Scalar<T>, "mass">, + Member<FixedArray<Scalar<T>,1u>, "density">, Member<Array<Particle<T,D>>, "particles"> >; } -template<typename T, uint64_t D> -saw::data<sch::ParticleGroup<T,D, sch::ParticleCollisionSpheroid<T>>> create_spheroid_particle_group( - saw::data<sch::Scalar<T>> rad_p, +template<typename T, uint64_t D, typename saw::native_data_type<T>::type radius> +saw::data<sch::ParticleGroup<T,D, sch::ParticleCollisionSpheroid<T,radius>>> create_spheroid_particle_group( saw::data<sch::Scalar<T>> density_p, const saw::data<sch::UInt64>& mask_resolution ){ - saw::data<sch::ParticleGroup<T,D,sch::ParticleCollisionSpheroid<T>>> part; + saw::data<sch::ParticleGroup<T,D,sch::ParticleCollisionSpheroid<T,2.0f>>> part; auto& mask = part.template get<"mask">(); - auto& collision = part.template get<"collision">(); - auto& mass = part.template get<"mass">(); + auto& density = part.template get<"density">().at({{0u}}); - if constexpr ( D == 1u ){ - saw::data<sch::Scalar<T>> c; - c.at({}).set(2.0); - mass = rad_p * c * density_p; - } else if constexpr ( D == 2u){ - saw::data<sch::Scalar<T>> pi; - pi.at({}).set(3.14159); - mass = rad_p * rad_p * pi * density_p; - } else if constexpr ( D == 3u ){ - saw::data<sch::Scalar<T>> c; - c.at({}).set(3.14159 * 4.0 / 3.0); - mass = rad_p * rad_p * rad_p * c * density_p; - } else { - static_assert(D == 0u or D > 3u, "Dimensions only supported for Dim 1,2 & 3."); - } + static_assert(D >= 1u and D <= 3u, "Dimensions only supported for Dim 1,2 & 3."); + density = density_p; saw::data<sch::FixedArray<sch::UInt64,D>> mask_dims; for(uint64_t i = 0u; i < D; ++i){ diff --git a/lib/core/c++/write_vtk.hpp b/lib/core/c++/write_vtk.hpp index f925361..e852172 100644 --- a/lib/core/c++/write_vtk.hpp +++ b/lib/core/c++/write_vtk.hpp @@ -136,7 +136,7 @@ struct lbm_vtk_writer<sch::Vector<T,D>> { template<typename T> struct lbm_vtk_writer<sch::Scalar<T>> { static saw::error_or<void> apply_header(std::ostream& vtk_file, std::string_view name){ - vtk_file<<"SCALARS "<<name<<" float\n"; + vtk_file<<"SCALARS "<<name<<" float 1\n"; vtk_file<<"LOOKUP_TABLE default\n"; return saw::make_void(); } @@ -206,7 +206,7 @@ struct lbm_vtk_writer<sch::Struct<sch::Member<sch::Chunk<MemberT,Ghost,Dims...>, if constexpr (sizeof...(MemberT) > 0u){ // POINT DATA { - vtk_file << "POINT_DATA " << pd_size.get() <<"\n"; + vtk_file << "\nPOINT_DATA " << pd_size.get() <<"\n"; } // HEADER TO BODY diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 50adb4f..9f43848 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -30,12 +30,23 @@ private: SAW_FORBID_COPY(data); SAW_FORBID_MOVE(data); public: + data(const data<typename meta_schema<Schema>::MetaSchema>& meta__, acpp::sycl::queue& q__): + q_{&q__}, + values_{nullptr} + { + (void) meta__; + SAW_ASSERT(q_); + values_ = acpp::sycl::malloc_device<data<Sch,Encode>>(ct_multiply<uint64_t,Dims...>::value,*q_); + SAW_ASSERT(values_); + } + data(acpp::sycl::queue& q__): q_{&q__}, values_{nullptr} { + SAW_ASSERT(q_); values_ = acpp::sycl::malloc_device<data<Sch,Encode>>(ct_multiply<uint64_t,Dims...>::value,*q_); - SAW_ASSERT(values_ and q_); + SAW_ASSERT(values_); } ~data(){ @@ -60,7 +71,7 @@ public: return values_[kel::lbm::flatten_index<schema::UInt64,sizeof...(Dims)>::apply(index,get_dims()).get()]; } - constexpr data<Sch,Encode>* flat_data() { + constexpr data<Sch,Encode>* flat_data() const { return values_; } }; @@ -81,7 +92,7 @@ public: {} data(data<schema::FixedArray<Sch,Dims...>, kel::lbm::encode::Sycl<Encode>>& values__): - values_{&values__.at({})} + values_{values__.flat_data()} {} data(data<Sch,Encode>* values__): @@ -100,9 +111,141 @@ public: return values_[kel::lbm::flatten_index<schema::UInt64,sizeof...(Dims)>::apply(index,get_dims()).get()]; } - constexpr data<Sch,Encode>* flat_data() { + constexpr data<Sch,Encode>* flat_data() const { + return values_; + } +}; + +template<typename Sch, uint64_t Dims, typename Encode> +class data<schema::Array<Sch,Dims>, kel::lbm::encode::Sycl<Encode>> final { +public: + using Schema = schema::Array<Sch,Dims>; +private: + static_assert(Dims > 0u, "Zero Dim Arrays make no sense here. If you meant to use this for math style approaches then use Tensor instead of Array"); + + data<Sch,Encode>* values_; + data<schema::FixedArray<schema::UInt64,Dims>,Encode> meta_; + acpp::sycl::queue* q_; + + SAW_FORBID_COPY(data); + SAW_FORBID_MOVE(data); +public: + data(const data<typename meta_schema<Schema>::MetaSchema>& meta__, acpp::sycl::queue& q__): + values_{nullptr}, + meta_{meta__}, + q_{&q__} + { + SAW_ASSERT(q_); + values_ = acpp::sycl::malloc_device<data<Sch,Encode>>(flat_size().get(),*q_); + SAW_ASSERT(values_); + } + + data(acpp::sycl::queue& q__): + values_{nullptr}, + meta_{}, + q_{&q__} + { + SAW_ASSERT(q_); + } + + ~data(){ + if(not values_){ + return; + } + SAW_ASSERT(q_); + + acpp::sycl::free(values_,*q_); + values_ = nullptr; + } + + constexpr data<schema::FixedArray<schema::UInt64, Dims>, Encode> meta() const { + return meta_; + } + + constexpr data<Sch,Encode>& at(const data<schema::FixedArray<schema::UInt64,Dims>, Encode>& index){ + return values_[kel::lbm::flatten_index<schema::UInt64,Dims>::apply(index,meta()).get()]; + } + + constexpr data<Sch,Encode>& at(const data<schema::FixedArray<schema::UInt64,Dims>, Encode>& index) const{ + return values_[kel::lbm::flatten_index<schema::UInt64,Dims>::apply(index,meta()).get()]; + } + + constexpr error_or<void> reset_to(const data<typename meta_schema<Schema>::MetaSchema>& meta_arg){ + SAW_ASSERT(q_); + meta_ = meta_arg; + + if(values_){ + acpp::sycl::free(values_,*q_); + } + values_ = acpp::sycl::malloc_device<data<Sch,Encode>>(flat_size().get(),*q_); + SAW_ASSERT(q_); + + return make_void(); + } + + constexpr data<Sch,Encode>* flat_data() const { + return values_; + } + + constexpr data<schema::UInt64,Encode> flat_size() const { + data<schema::UInt64> mult{1u}; + + for(uint64_t i{0u}; i < Dims; ++i){ + mult = mult * meta_.at({i}); + } + + return mult; + } +}; + +template<typename Sch, uint64_t Dims, typename Encode> +class data<schema::Ptr<schema::Array<Sch,Dims>>, kel::lbm::encode::Sycl<Encode>> final { +public: + using Schema = schema::Ptr<schema::Array<Sch,Dims>>; +private: + static_assert(Dims > 0u, "Zero Dim Arrays make no sense here. If you meant to use this for math style approaches then use Tensor instead of Array"); + + data<Sch,Encode>* values_; + data<schema::FixedArray<schema::UInt64,Dims>,Encode> meta_; + +public: + SAW_DEFAULT_COPY(data); + SAW_DEFAULT_MOVE(data); + + data(): + values_{nullptr} + {} + + data(const data<schema::Array<Sch,Dims>, kel::lbm::encode::Sycl<Encode>>& values__): + values_{values__.flat_data()}, + meta_{values__.meta()} + {} + + constexpr data<schema::FixedArray<schema::UInt64, Dims>, Encode> meta() const { + return meta_; + } + + constexpr data<Sch,Encode>& at(const data<schema::FixedArray<schema::UInt64,Dims>, Encode>& index){ + return values_[kel::lbm::flatten_index<schema::UInt64,Dims>::apply(index,meta()).get()]; + } + + constexpr data<Sch,Encode>& at(const data<schema::FixedArray<schema::UInt64,Dims>, Encode>& index) const{ + return values_[kel::lbm::flatten_index<schema::UInt64,Dims>::apply(index,meta()).get()]; + } + + constexpr data<Sch,Encode>* flat_data() const { return values_; } + + constexpr data<schema::UInt64,Encode> flat_size() const { + data<schema::UInt64> mult{1u}; + + for(uint64_t i{0u}; i < Dims; ++i){ + mult = mult * meta_.at({i}); + } + + return mult; + } }; template<typename Sch, uint64_t Ghost, uint64_t... Sides, typename Encode> @@ -115,6 +258,10 @@ private: data<InnerSchema, kel::lbm::encode::Sycl<Encode>> values_; public: + data(const data<typename meta_schema<Schema>::MetaSchema>& meta__, acpp::sycl::queue& q__): + values_{meta__,q__} + {} + data(acpp::sycl::queue& q__): values_{q__} {} @@ -131,6 +278,10 @@ public: return data<InnerSchema,kel::lbm::encode::Sycl<Encode>>::get_dims(); } + static constexpr auto ghost_meta() { + return data<InnerSchema,kel::lbm::encode::Sycl<Encode>>::meta(); + } + data<ValueSchema, Encode>& at(const data<schema::FixedArray<schema::UInt64,sizeof...(Sides)>>& index){ std::decay_t<decltype(index)> ind; for(uint64_t i = 0u; i < sizeof...(Sides); ++i){ @@ -151,7 +302,11 @@ public: return data<schema::FixedArray<schema::UInt64, sizeof...(Sides)>,Encode>{{Sides...}}; } - auto flat_data(){ + static constexpr auto meta(){ + return data<schema::FixedArray<schema::UInt64, sizeof...(Sides)>,Encode>{{Sides...}}; + } + + auto flat_data() const { return values_.flat_data(); } @@ -178,6 +333,10 @@ public: values_{nullptr} {} + data(const data<kel::lbm::sch::Chunk<Sch,Ghost,Sides...>, kel::lbm::encode::Sycl<Encode>>& values__): + values_{values__.flat_data()} + {} + data(data<Sch,Encode>* values__): values_{values__} {} @@ -194,6 +353,10 @@ public: return data<InnerSchema,kel::lbm::encode::Sycl<Encode>>::get_dims(); } + static constexpr auto ghost_meta() { + return data<InnerSchema,kel::lbm::encode::Sycl<Encode>>::meta(); + } + data<ValueSchema, Encode>& at(const data<schema::FixedArray<schema::UInt64,sizeof...(Sides)>>& index){ std::decay_t<decltype(index)> ind; for(uint64_t i = 0u; i < sizeof...(Sides); ++i){ @@ -210,11 +373,15 @@ public: return values_.at(ind); } + static constexpr auto meta(){ + return data<schema::FixedArray<schema::UInt64, sizeof...(Sides)>,Encode>{{Sides...}}; + } + static constexpr auto get_dims(){ return data<schema::FixedArray<schema::UInt64, sizeof...(Sides)>,Encode>{{Sides...}}; } - auto flat_data(){ + auto flat_data() const { return values_.flat_data(); } @@ -224,6 +391,70 @@ public: }; template<typename... Members, typename Encode> +struct data<schema::Tuple<Members...>, kel::lbm::encode::Sycl<Encode>> final { +public: + using StorageT = std::tuple<data<Members,kel::lbm::encode::Sycl<Encode>>...>; + using Schema = schema::Tuple<Members...>; +private: + StorageT members_; + + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ + template<std::size_t... Is> + constexpr data(acpp::sycl::queue& q, std::index_sequence<Is...>): + members_{(static_cast<void>(Is), q)...} + {} +public: + /* + data(data<typename meta_schema<Schema>::MetaSchema>& meta__, acpp::sycl::queue& q__): + data{q__, std::make_index_sequence<sizeof...(Members)>{}} + {} + */ + + data(acpp::sycl::queue& q__): + data{q__, std::make_index_sequence<sizeof...(Members)>{}} + { + q__.wait(); + } + + template<size_t i> + auto& get(){ + return std::get<i>(members_); + } + + template<size_t i> + auto& get() const { + return std::get<i>(members_); + } +}; + +template<typename... Members, typename Encode> +struct data<schema::Ptr<schema::Tuple<Members...>>, kel::lbm::encode::Sycl<Encode>> final { +public: + using StorageT = std::tuple<data<schema::Ptr<Members>,kel::lbm::encode::Sycl<Encode>>...>; + using Schema = schema::Tuple<Members...>; +private: + StorageT members_; + + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ +public: + data() = default; + + template<size_t i> + auto& get(){ + return std::get<i>(members_); + } + + template<size_t i> + const auto& get() const { + return std::get<i>(members_); + } +}; + +template<typename... Members, typename Encode> class data<schema::Struct<Members...>, kel::lbm::encode::Sycl<Encode> > final { public: using StorageT = std::tuple<data<typename Members::ValueType,kel::lbm::encode::Sycl<Encode>>...>; @@ -235,6 +466,9 @@ private: */ StorageT members_; + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ template<std::size_t... Is> constexpr data(acpp::sycl::queue& q, std::index_sequence<Is...>): members_{(static_cast<void>(Is), q)...} @@ -246,13 +480,23 @@ public: q__.wait(); } + template<size_t i> + auto& get(){ + return std::get<i>(members_); + } + + template<size_t i> + auto& get() const { + return std::get<i>(members_); + } + template<saw::string_literal K> auto& get(){ return std::get<parameter_key_pack_index<K, Members::KeyLiteral...>::value>(members_); } template<saw::string_literal K> - const auto& get() const { + auto& get() const { return std::get<parameter_key_pack_index<K, Members::KeyLiteral...>::value>(members_); } }; @@ -268,17 +512,26 @@ private: * Do it here by specializing. */ StorageT members_; - public: data() = default; + template<size_t i> + auto& get(){ + return std::get<i>(members_); + } + + template<size_t i> + auto& get() const { + return std::get<i>(members_); + } + template<saw::string_literal K> auto& get(){ return std::get<parameter_key_pack_index<K, Keys...>::value>(members_); } template<saw::string_literal K> - const auto& get() const { + auto& get() const { return std::get<parameter_key_pack_index<K, Keys...>::value>(members_); } }; @@ -301,14 +554,10 @@ struct sycl_copy_helper<sch::Struct<Members...>, Encode> final { auto& host_member_data = host_data.template get<M::KeyLiteral>(); auto& sycl_member_data = sycl_data.template get<M::KeyLiteral>(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(host_ptr,sycl_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper<typename M::ValueType,Encode>::copy_to_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_device_member<i+1u>(host_data,sycl_data,q); } @@ -328,12 +577,10 @@ struct sycl_copy_helper<sch::Struct<Members...>, Encode> final { auto& host_member_data = host_data.template get<M::KeyLiteral>(); auto& sycl_member_data = sycl_data.template get<M::KeyLiteral>(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(sycl_ptr,host_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper<typename M::ValueType,Encode>::copy_to_host(sycl_member_data,host_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_host_member<i+1u>(sycl_data,host_data,q); } @@ -345,6 +592,107 @@ struct sycl_copy_helper<sch::Struct<Members...>, Encode> final { static saw::error_or<void> copy_to_host(saw::data<Schema,encode::Sycl<Encode>>& sycl_data, saw::data<Schema,Encode>& host_data, sycl::queue& q){ return copy_to_host_member<0u>(sycl_data, host_data, q); } + + template<uint64_t i> + static saw::error_or<void> malloc_on_device_member(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type<i,Members...>::type; + auto& host_member_data = host_data.template get<i>(); + auto& sycl_member_data = sycl_data.template get<i>(); + + auto eov = sycl_copy_helper<typename M::ValueType,Encode>::malloc_on_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } + + return malloc_on_device_member<i+1u>(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or<void> malloc_on_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + return malloc_on_device_member<0u>(host_data,sycl_data,q); + } +}; + +template<typename... Members, typename Encode> +struct sycl_copy_helper<sch::Tuple<Members...>, Encode> final { + using Schema = sch::Tuple<Members...>; + + template<uint64_t i> + static saw::error_or<void> copy_to_device_member(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type<i,Members...>::type; + auto& host_member_data = host_data.template get<i>(); + auto& sycl_member_data = sycl_data.template get<i>(); + + auto eov = sycl_copy_helper<M,Encode>::copy_to_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } + + return copy_to_device_member<i+1u>(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or<void> copy_to_device(saw::data<Schema,Encode>& host_data, saw::data<Schema, encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + return copy_to_device_member<0u>(host_data, sycl_data, q); + } + + template<uint64_t i> + static saw::error_or<void> copy_to_host_member(saw::data<Schema,encode::Sycl<Encode>>& sycl_data, saw::data<Schema,Encode>& host_data, sycl::queue& q){ + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type<i,Members...>::type; + + auto& host_member_data = host_data.template get<i>(); + auto& sycl_member_data = sycl_data.template get<i>(); + + auto eov = sycl_copy_helper<M,Encode>::copy_to_host(sycl_member_data,host_member_data,q); + if(eov.is_error()){ + return eov; + } + + return copy_to_host_member<i+1u>(sycl_data,host_data,q); + } + + return saw::make_void(); + } + + + static saw::error_or<void> copy_to_host( + saw::data<Schema,Encode>& host_data, + saw::data<Schema,encode::Sycl<Encode>>& sycl_data, + sycl::queue& q + ){ + return copy_to_host_member<0u>(sycl_data, host_data, q); + } + + template<uint64_t i> + static saw::error_or<void> malloc_on_device_member(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type<i,Members...>::type; + auto& host_member_data = host_data.template get<i>(); + auto& sycl_member_data = sycl_data.template get<i>(); + + auto eov = sycl_copy_helper<M,Encode>::malloc_on_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } + + return malloc_on_device_member<i+1u>(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or<void> malloc_on_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + return malloc_on_device_member<0u>(host_data,sycl_data,q); + } }; template<typename Sch, uint64_t... Dims, typename Encode> @@ -374,43 +722,205 @@ struct sycl_copy_helper<sch::FixedArray<Sch,Dims...>, Encode> final { }).wait(); return saw::make_void(); } + + static saw::error_or<void> malloc_on_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + (void) host_data; + (void) sycl_data; + (void) q; + return saw::make_void(); + } +}; + +template<typename Sch, uint64_t Ghost, uint64_t... Dims, typename Encode> +struct sycl_copy_helper<sch::Chunk<Sch,Ghost,Dims...>, Encode> final { + using Schema = sch::Chunk<Sch,Ghost,Dims...>; + + static saw::error_or<void> copy_to_host(saw::data<Schema,encode::Sycl<Encode>>& sycl_data, saw::data<Schema,Encode>& host_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); + + auto flat_size = host_data.flat_size(); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, flat_size.get()); + }).wait(); + return saw::make_void(); + } + + static saw::error_or<void> copy_to_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); + + auto flat_size = host_data.flat_size(); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, flat_size.get()); + }).wait(); + return saw::make_void(); + } +}; + +template<typename Sch, uint64_t Dims, typename Encode> +struct sycl_copy_helper<sch::Array<Sch,Dims>, Encode> final { + using Schema = sch::Array<Sch,Dims>; + + static saw::error_or<void> copy_to_host(saw::data<Schema,encode::Sycl<Encode>>& sycl_data, saw::data<Schema,Encode>& host_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); + + SAW_ASSERT(host_data.flat_size() == sycl_data.flat_size()); + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, host_data.flat_size().get()); + }).wait(); + return saw::make_void(); + } + + static saw::error_or<void> copy_to_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + + { + auto hm = host_data.meta(); + auto sm = sycl_data.meta(); + bool equ{true}; + for(uint64_t i{0u}; i < Dims; ++i){ + equ &= (hm.at({i}).get() == sm.at({i}).get()); + } + if(not equ){ + sycl_data.reset_to(hm); + } + } + + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, host_data.flat_size().get()); + }).wait(); + + return saw::make_void(); + } + + static saw::error_or<void> malloc_on_device(saw::data<Schema,Encode>& host_data, saw::data<Schema,encode::Sycl<Encode>>& sycl_data, sycl::queue& q){ + sycl_data = {host_data.meta(),q}; + return saw::make_void(); + } }; template<typename Schema, typename Encode> -struct make_chunk_struct_view_helper; +struct make_view_helper; + +template<typename Sch, uint64_t Dims, typename Encode> +struct make_view_helper<sch::Array<Sch,Dims>, encode::Sycl<Encode>> { +public: +static saw::error_or<void> apply( + saw::data<sch::Array<Sch,Dims>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::Array<Sch,Dims>>, encode::Sycl<Encode>>& dat_view +){ + dat_view = {dat}; + return saw::make_void(); +} +}; + +template<typename Sch, uint64_t... Dims, typename Encode> +struct make_view_helper<sch::FixedArray<Sch,Dims...>, encode::Sycl<Encode>> { +public: + static saw::error_or<void> apply( + saw::data<sch::FixedArray<Sch,Dims...>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::FixedArray<Sch,Dims...>>, encode::Sycl<Encode>>& dat_view + ){ + dat_view = {dat}; + return saw::make_void(); + } +}; + +template<typename Sch, uint64_t Ghost, uint64_t... Dims, typename Encode> +struct make_view_helper<sch::Chunk<Sch,Ghost,Dims...>, encode::Sycl<Encode>> { +public: + static saw::error_or<void> apply( + saw::data<sch::Chunk<Sch,Ghost,Dims...>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::Chunk<Sch,Ghost,Dims...>>, encode::Sycl<Encode>>& dat_view + ){ + dat_view = {dat}; + return saw::make_void(); + } +}; template<typename... Sch, saw::string_literal... Keys, typename Encode> -struct make_chunk_struct_view_helper<sch::Struct<sch::Member<Sch,Keys>...>, encode::Sycl<Encode>> { +struct make_view_helper<sch::Struct<sch::Member<Sch,Keys>...>, encode::Sycl<Encode>> { private: - template<uint64_t i> - static void apply_i( +template<uint64_t i> +static saw::error_or<void> apply_i( saw::data<sch::Struct<sch::Member<Sch,Keys>...>, encode::Sycl<Encode>>& dat, - saw::data<sch::Ptr<sch::Struct<sch::Member<Sch,Keys>...>>, encode::Sycl<Encode>>& dat_view) - { + saw::data<sch::Ptr<sch::Struct<sch::Member<Sch,Keys>...>>, encode::Sycl<Encode>>& dat_view + ){ if constexpr (i < sizeof...(Sch)){ using M = typename saw::parameter_pack_type<i,sch::Member<Sch,Keys>...>::type; - - dat_view.template get<M::KeyLiteral>() = {dat.template get<M::KeyLiteral>().flat_data()}; - apply_i<i+1u>(dat,dat_view); + auto eov = make_view_helper<typename M::ValueType,encode::Sycl<Encode>>::apply(dat.template get<M::KeyLiteral>(),dat_view.template get<M::KeyLiteral>()); + if(eov.is_error()){ + return eov; + } + + return apply_i<i+1u>(dat,dat_view); } + + return saw::make_void(); } public: - static auto apply(saw::data<sch::Struct<sch::Member<Sch,Keys>...>, encode::Sycl<Encode>>& dat){ - saw::data<sch::Ptr<sch::Struct<sch::Member<Sch,Keys>...>>, encode::Sycl<Encode>> str_view; +static saw::error_or<void> apply( + saw::data<sch::Struct<sch::Member<Sch,Keys>...>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::Struct<sch::Member<Sch,Keys>...>>, encode::Sycl<Encode>>& dat_view +){ + return apply_i<0u>(dat,dat_view); +} +}; + +template<typename... Sch, typename Encode> +struct make_view_helper<sch::Tuple<Sch...>, encode::Sycl<Encode>> { +private: + template<uint64_t i> + static saw::error_or<void> apply_i( + saw::data<sch::Tuple<Sch...>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::Tuple<Sch...>>, encode::Sycl<Encode>>& dat_view + ){ + if constexpr (i < sizeof...(Sch)){ + using M = typename saw::parameter_pack_type<i,Sch...>::type; + + auto eov = make_view_helper<M,encode::Sycl<Encode>>::apply(dat.template get<i>(), dat_view.template get<i>()); + if(eov.is_error()){ + return eov; + } - apply_i<0u>(dat,str_view); + return apply_i<i+1u>(dat,dat_view); + } - return str_view; + return saw::make_void(); + } +public: + static saw::error_or<void> apply( + saw::data<sch::Tuple<Sch...>, encode::Sycl<Encode>>& dat, + saw::data<sch::Ptr<sch::Tuple<Sch...>>, encode::Sycl<Encode>> dat_view + ){ + return apply_i<0u>(dat,dat_view); } }; } // Ptr => Ptr<Chunk<T,Ghost,Dims...>>> => Ptr<FixedArray<T,Dims+Ghost...> template<typename Schema,typename Encode> -auto make_chunk_struct_view(saw::data<Schema,Encode>& dat){ - return impl::make_chunk_struct_view_helper<Schema,Encode>::apply(dat); +auto make_view(saw::data<Schema,Encode>& dat){ + saw::data<sch::Ptr<Schema>,Encode> dat_view; + auto eov = impl::make_view_helper<Schema,Encode>::apply(dat,dat_view); + (void) eov; + + return dat_view; } class device final { @@ -433,6 +943,16 @@ public: return impl::sycl_copy_helper<Sch,Encode>::copy_to_host(sycl_data, host_data, q_); } + template<typename Sch, typename Encode> + saw::error_or<void> malloc_on_device( + saw::data<Sch,Encode>& host_data, + saw::data<Sch,encode::Sycl<Encode>>& sycl_data + ){ + auto eov = impl::sycl_copy_helper<Sch,Encode>::malloc_on_device(host_data, sycl_data, q_); + q_.wait(); + return eov; + } + auto& get_handle(){ return q_; } diff --git a/lib/sycl/tests/data.cpp b/lib/sycl/tests/data.cpp index 3073a22..4321a0d 100644 --- a/lib/sycl/tests/data.cpp +++ b/lib/sycl/tests/data.cpp @@ -2,6 +2,24 @@ #include "../c++/lbm.hpp" +namespace { + +namespace sch { +using namespace kel::lbm::sch; +using TestObjSchema = Tuple< + Member<FixedArray<UInt64,2u,2u>, "foo">, + Member<Array<Float32>, "bar">, + Member< + Array< + Struct< + Member<FixedArray<Float32,2u>,"pos"> + > + >, + "baz" + > +>; +} + SAW_TEST("Sycl Data Compilation"){ acpp::sycl::queue q; saw::data< @@ -19,3 +37,20 @@ SAW_TEST("Sycl Data Compilation"){ // test_f.at({}).set(1); // SAW_EXPECT(test_f.at({}).get() == 1, "Value check failed"); } + +SAW_TEST("Sycl Data Array of Struct"){ + acpp::sycl::queue q; + + saw::data<sch::Array<sch::Float64>, kel::lbm::encode::Sycl<saw::encode::Native>> a{{{2u}},q}; +} + +/* +SAW_TEST("Sycl Data Compilation for Particle Similacrum"){ + acpp::sycl::queue q; + + saw::data< + sch::TestObjSchema + > a; +} +*/ +} diff --git a/util/podman/norce_build_and_run.sh b/util/podman/norce_build_and_run.sh index 048d2c7..42d1f24 100755 --- a/util/podman/norce_build_and_run.sh +++ b/util/podman/norce_build_and_run.sh @@ -74,10 +74,10 @@ podman run --rm -it \ nix-store --verify --check-contents || true # Build - nix-build default.nix -A release.examples --out-link result + nix-build default.nix -A release.examples --out-link /tmp/result - # --- Interactive binary selection --- - BIN_DIR=./result/bin + # --- Interactive binary selection --- + BIN_DIR=/tmp/result/bin mapfile -t BINARIES < <(ls -1 \$BIN_DIR) if (( \${#BINARIES[@]} == 0 )); then echo 'No binaries found in result/bin' |
