From 92f5645809449f56c39c0e4c6c29045b8a4acea6 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Mon, 16 Feb 2026 14:39:38 +0100 Subject: Dangling changes --- examples/poiseulle_3d_gpu/.nix/derivation.nix | 10 +- examples/poiseulle_3d_gpu/SConscript | 6 +- examples/poiseulle_3d_gpu/SConstruct | 4 +- examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp | 381 ---------------------- examples/poiseulle_3d_gpu/sim.cpp | 422 +++++++++++++++++++++++++ examples/poiseulle_particles_2d_gpu/sim.cpp | 5 +- 6 files changed, 437 insertions(+), 391 deletions(-) delete mode 100644 examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp create mode 100644 examples/poiseulle_3d_gpu/sim.cpp (limited to 'examples') diff --git a/examples/poiseulle_3d_gpu/.nix/derivation.nix b/examples/poiseulle_3d_gpu/.nix/derivation.nix index ee78fc8..74ea0ab 100644 --- a/examples/poiseulle_3d_gpu/.nix/derivation.nix +++ b/examples/poiseulle_3d_gpu/.nix/derivation.nix @@ -7,7 +7,7 @@ , pname , version , adaptive-cpp -, kel-lbm +, kel }: stdenv.mkDerivation { @@ -26,11 +26,13 @@ stdenv.mkDerivation { forstio.async forstio.codec forstio.codec-unit - forstio.remote + forstio.io + forstio.remote + forstio.remote-filesystem forstio.codec-json adaptive-cpp - kel-lbm.core -# kel-lbm.sycl + kel.lbm.core + kel.lbm.sycl ]; preferLocalBuild = true; diff --git a/examples/poiseulle_3d_gpu/SConscript b/examples/poiseulle_3d_gpu/SConscript index 2525367..7c8bbf1 100644 --- a/examples/poiseulle_3d_gpu/SConscript +++ b/examples/poiseulle_3d_gpu/SConscript @@ -22,12 +22,12 @@ env.headers += examples_env.headers; # Cavity2D examples_objects = []; -examples_env.add_source_files(examples_objects, ['poiseulle_3d_gpu.cpp'], shared=False); -examples_env.poiseulle_3d_gpu = examples_env.Program('#bin/poiseulle_3d_gpu', [examples_objects]); +examples_env.add_source_files(examples_objects, ['sim.cpp'], shared=False); +examples_env.poiseulle_2d_gpu = examples_env.Program('#bin/poiseulle_particles_3d_gpu', [examples_objects]); # Set Alias env.examples = [ - examples_env.poiseulle_3d_gpu + examples_env.poiseulle_2d_gpu ]; env.Alias('examples', env.examples); env.targets += ['examples']; diff --git a/examples/poiseulle_3d_gpu/SConstruct b/examples/poiseulle_3d_gpu/SConstruct index a7201cb..0611b67 100644 --- a/examples/poiseulle_3d_gpu/SConstruct +++ b/examples/poiseulle_3d_gpu/SConstruct @@ -57,7 +57,9 @@ env=Environment(ENV=os.environ, variables=env_vars, CPPPATH=[], '-Wextra' ], LIBS=[ - 'forstio-core' + 'forstio-core', + 'forstio-async', + 'forstio-io' ] ); env.__class__.add_source_files = add_kel_source_files diff --git a/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp b/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp deleted file mode 100644 index c221f42..0000000 --- a/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp +++ /dev/null @@ -1,381 +0,0 @@ -#include -#include - -#include - -template -using SyclHostAlloc = acpp::sycl::usm_allocator, acpp::sycl::usm::alloc::host>; - -namespace kel { -namespace lbm { -namespace sch { -using namespace saw::schema; - -/** - * Basic distribution function - * Base type - * D - * Q - * Scalar factor - * D factor - * Q factor - */ -using T = Float64; -using D2Q9 = Descriptor<2u,9u>; - -using DfCell = Cell; -using CellInfo = Cell; - -/** - * Basic type for simulation - */ -using CellStruct = Struct< - Member, - Member, - Member ->; - -using MacroStruct = Struct< - Member, "velocity">, - Member ->; - -} - -namespace cmpt { -template -struct PressureBoundaryRestrictedVelocityTo {}; -} - -template -struct component> { -private: - saw::data pressure_setting_; - saw::data rho_setting_; -public: - component(const saw::data& pressure_setting__): - pressure_setting_{pressure_setting__}, - rho_setting_{pressure_setting__ * df_info::inv_cs2} - {} - - template - void apply( - saw::data* field, - saw::data> index, - const saw::data>& meta, - uint64_t time_step - ) const { - - using dfi = df_info; - - auto flat_ind = flatten_index::apply(index,meta); - - bool is_even = ((time_step % 2u) == 0u); - auto& cell = field[flat_ind.get()]; - - auto& info = cell.template get<"info">(); - if(info({0u}).get() == 0u){ - return; - } - auto& dfs_old = (is_even) ? cell.template get<"dfs_old">() : cell.template get<"dfs">(); - - /** - * Sum all known DFs - */ - saw::data sum_df{0u}; - for(saw::data k{0u}; k < saw::data{Desc::Q}; ++k){ - auto c_k = dfi::directions[k.get()]; - auto flat_ind_n = flatten_index::apply({{index.at({0u})+c_k[0u], index.at({1u})+c_k[1u]}},meta); - auto& cell_n = field[flat_ind_n.get()]; - auto& info_n = cell_n.template get<"info">(); - auto info_n_val = info_n({0u}); - auto k_opp = dfi::opposite_index[k.get()]; - - if(info_n_val.get() > 0u){ - sum_df += dfs_old({k_opp}); - } - } - /** - * Get the sum of the unknown dfs and precalculate the direction - */ - constexpr int known_dir = East ? 1 : -1; - auto sum_unknown_dfs = (rho_setting_ - sum_df) * saw::data{known_dir}; - - for(saw::data k{0u}; k < saw::data{Desc::Q}; ++k){ - auto c_k = dfi::directions[k.get()]; - auto flat_ind_n = flatten_index::apply({{index.at({0u})+c_k[0u], index.at({1u})+c_k[1u]}},meta); - auto& cell_n = field[flat_ind_n.get()]; - auto& info_n = cell_n.template get<"info">(); - auto info_n_val = info_n({0u}); - - if(info_n_val.get() > 0u){ - sum_unknown_dfs += dfs_old({k}) * c_k[0u]; - } - } - - auto vel_x = sum_unknown_dfs / rho_setting_; - - if constexpr (East) { - dfs_old({2u}) = dfs_old({1u}) + saw::data{2.0 / 3.0} * rho_setting_ * vel_x; - dfs_old({6u}) = dfs_old({5u}) + saw::data{1.0 / 6.0} * rho_setting_ * vel_x + saw::data{0.5} * (dfs_old({4u}) - dfs_old({3u})); - dfs_old({8u}) = dfs_old({7u}) + saw::data{1.0 / 6.0} * rho_setting_ * vel_x + saw::data{0.5} * (dfs_old({3u}) - dfs_old({4u})); - }else if constexpr (not East){ - dfs_old({1u}) = dfs_old({2u}) - saw::data{2.0 / 3.0} * rho_setting_ * vel_x; - dfs_old({5u}) = dfs_old({6u}) - saw::data{1.0 / 6.0} * rho_setting_ * vel_x + saw::data{0.5} * (dfs_old({3u}) - dfs_old({4u})); - dfs_old({7u}) = dfs_old({8u}) - saw::data{1.0 / 6.0} * rho_setting_ * vel_x + saw::data{0.5} * (dfs_old({4u}) - dfs_old({3u})); - } - } -}; - -/** - * This is massively hacky and expects a lot of conditions - * Either this or mirrored along the horizontal line works - * - * 0 - 2 - 2 - * 0 - 3 - 1 - * 0 - 3 - 1 - * ......... - * 0 - 3 - 1 - * 0 - 2 - 2 - * - */ - -template -saw::error_or set_geometry( - saw::data* cells, - const saw::data>& meta, - acpp::sycl::queue& sycl_q -){ - using namespace kel::lbm; - - saw::data rho{1.0}; - saw::data> vel{{0.0,0.0}}; - auto eq = equilibrium(rho, vel); - - sycl_q.submit([&](acpp::sycl::handler& h){ - h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id<2> idx){ - size_t i = idx[0]; - size_t j = idx[1]; - size_t acc_id = j * meta.at({0u}).get() + i; - - auto& c = cells[acc_id]; - auto& info = c.template get<"info">()({0}); - auto& dfs = c.template get<"dfs">(); - auto& dfs_old = c.template get<"dfs_old">(); - - if(i >= 2u and j >= 2u and (i+2u) < meta.at({0u}).get() and (j+2u) < meta.at({1u}).get()){ - // Fluid - info.set({2u}); - }else if(((j+2u) == meta.at({1u}).get() or j == 1u) and (i>=1u and (i+1u)= 1 and (j+1 < meta.at({1u}).get()) ) ){ - // Left input - info.set({3u}); - }else if((i+2u) == meta.at({0u}).get() and (j >= 1 and (j+1) < meta.at({1u}).get() )){ - // Right output - info.set({4u}); - }else { - info.set({0u}); - } - for(saw::data k{0u}; k < saw::data{Desc::Q}; ++k){ - dfs(k) = eq.at(k); - dfs_old(k) = eq.at(k); - } - }); - }).wait(); - - return saw::make_void(); -} - -template -void step( - saw::data* cells, - saw::data* macro_cells, - const saw::data>& meta, - uint64_t time_step, - acpp::sycl::queue& sycl_q -){ - using namespace kel::lbm; - using dfi = df_info; - - constexpr saw::data frequency{1.0 / 0.5384}; - - bool is_even = ((time_step % 2u) == 0u); - /** - * 1. Relaxation parameter \tau - */ - /* - component coll{0.5384}; - component bb; - */ - component> inlet{1.1 * dfi::cs2}; - component> outlet{1.0 * dfi::cs2}; - - - // auto collision_ev = - sycl_q.submit([&](acpp::sycl::handler& h){ - - /// Collision - h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id idx){ - size_t i = idx[0]; - size_t j = idx[1]; - size_t acc_id = j * meta.at({0u}).get() + i; - - auto& c = cells[acc_id]; - auto& info = cells[acc_id].template get<"info">(); - - switch (info({0u}).get()) { - // Bounce Back - case 1u: { - auto& dfs_old = is_even ? c.template get<"dfs_old">() : c.template get<"dfs">(); - auto df_cpy = dfs_old.copy(); - - for(uint64_t k = 1u; k < Desc::Q; ++k){ - dfs_old({k}) = df_cpy({dfi::opposite_index.at(k)}); - } - - break; - } - // Collision - case 2u: { - // coll.apply(latt_acc, {i, j}, time_step); - auto& dfs_old = is_even ? c.template get<"dfs_old">() : c.template get<"dfs">(); - - auto& macro_c = macro_cells[acc_id]; - - saw::data& rho = macro_c.template get<"pressure">(); - saw::data>& vel = macro_c.template get<"velocity">(); - - compute_rho_u(dfs_old,rho,vel); - auto eq = equilibrium(rho,vel); - - for(uint64_t k = 0u; k < Desc::Q; ++k){ - dfs_old({k}) = dfs_old({k}) + frequency * (eq.at({k}) - dfs_old({k})); - } - break; - } - case 3u: { - inlet.apply(cells, {{i,j}}, meta, time_step); - break; - } - case 4u: { - outlet.apply(cells, {{i,j}}, meta, time_step); - break; - } - default: - // Do nothing - break; - } - }); - }).wait(); - - //auto stream_ev = - sycl_q.submit([&](acpp::sycl::handler& h){ - /// Stream - h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id idx){ - size_t i = idx[0]; - size_t j = idx[1]; - size_t acc_id = j * meta.at({0u}).get() + i; - - auto& c = cells[acc_id]; - auto& info = c.template get<"info">(); - auto& dfs_new = is_even ? c.template get<"dfs">() : c.template get<"dfs_old">(); - - if (info({0u}).get() > 0u) { - for (uint64_t k = 0u; k < Desc::Q; ++k) { - auto dir = dfi::directions[dfi::opposite_index[k]]; - size_t acc_old_id = (j+dir[1]) * meta.at({0u}).get() + (i+dir[0]); - - auto& dfs_old = is_even ? cells[acc_old_id].template get<"dfs_old">() : cells[acc_old_id].template get<"dfs">(); - auto& info_old = cells[acc_old_id].template get<"info">(); - - dfs_new({k}) = dfs_old({k}); - } - } - - }); - }).wait(); -} -} -} - -template -saw::error_or kel_main(int argc, char** argv){ - using namespace kel; - - using dfi = lbm::df_info; - - auto eo_lbm_dir = lbm::output_directory(); - if(eo_lbm_dir.is_error()){ - return std::move(eo_lbm_dir.get_error()); - } - auto& lbm_dir = eo_lbm_dir.get_value(); - auto out_dir = lbm_dir / "poiseulle_channel_2d_gpu"; - - // Create Dir TODO - - // - lbm::converter conv { - // delta_x - {{1.0}}, - // delta_t - {{1.0}} - }; - - uint64_t x_d = 512u; - uint64_t y_d = 128u; - uint64_t z_d = 128u; - - saw::data> meta{{x_d,y_d,z_d}}; - - acpp::sycl::queue sycl_q; - SyclHostAlloc sycl_host_alloc{sycl_q}; - // SyclDeviceAlloc sycl_dev_alloc{sycl_q}; - - std::vector, SyclHostAlloc> host_cells{x_d * y_d * z_d,sycl_host_alloc}; - - saw::data* cells = acpp::sycl::malloc_device>(x_d * y_d * z_d,sycl_q); - saw::data* macro_cells = acpp::sycl::malloc_device>(x_d * y_d * z_d,sycl_q); - { - auto eov = lbm::set_geometry(cells,meta,sycl_q); - if(eov.is_error()){ - return eov; - } - } - - - for(uint64_t i = 0u; i < 1024u*128u; ++i){ - lbm::step(cells,macro_cells,meta,i,sycl_q); - sycl_q.wait(); - if(i%4u == 0u){ - std::string vtk_f_name{"tmp/poiseulle_2d_gpu_"}; - vtk_f_name += std::to_string(i) + ".vtk"; - // write_vtk_file(vtk_f_name,host_cells); - sycl_q.memcpy(&host_cells[0u], macro_cells, x_d * y_d * z_d * sizeof(saw::data) ).wait(); - lbm::write_vtk_file(vtk_f_name, &host_cells[0], meta); - } - } - - sycl_q.wait(); - acpp::sycl::free(cells, sycl_q); - acpp::sycl::free(macro_cells, sycl_q); - sycl_q.wait(); - - return saw::make_void(); -} - -int main(int argc, char** argv){ - auto eov = kel_main(argc, argv); - if(eov.is_error()){ - auto& err = eov.get_error(); - std::cerr<<"[Error] "< 0u){ - std::cerr<<" - "< +#include +#include + +#include +#include +#include +#include + +namespace kel { +namespace lbm { + +constexpr uint64_t dim_x = 1024ul; +constexpr uint64_t dim_y = 512ul; +constexpr uint64_t dim_z = 512ul; +constexpr uint64_t particle_size = 128ul; + +namespace sch { +using namespace saw::schema; + +using InfoChunk = Chunk; + +template +using DfChunk = Chunk, 1u, dim_x, dim_y, dim_z>; + +template +using ScalarChunk = Chunk, 0u, dim_x, dim_y, dim_z>; + +template +using VectorChunk = Chunk, 0u, dim_x, dim_y, dim_z>; + +template +using ChunkStruct = Struct< + Member, + Member, "dfs">, + Member, "dfs_old">, + Member, "particle_N">, + Member, "particle_D"> +>; + +template +using VelChunk = Chunk, 0u, dim_x, dim_y, dim_z>; + +template +using RhoChunk = Chunk, 0u, dim_x, dim_y, dim_z>; + +template +using MacroStruct = Struct< + Member, "velocity">, + Member, "density">, + Member, "porosity"> +>; + +//template +//using ParticleArray = Array< +// Particle +//>; +} + +template +saw::error_or setup_initial_conditions( + saw::data>& fields, + saw::data>& macros, + saw::data, particle_size>>& particles +){ + auto& info_f = fields.template get<"info">(); + // Set everything as walls + iterator::apply( + [&](auto& index){ + info_f.at(index).set(1u); + }, + {}, + info_f.get_dims(), + {} + ); + // Fluid + iterator::apply( + [&](auto& index){ + info_f.at(index).set(2u); + }, + {}, + info_f.get_dims(), + {{1u,1u,1u}} + ); + + // Inflow + iterator::apply( + [&](auto& index){ + info_f.at(index).set(3u); + }, + {{0u,0u,0u}}, + {{1u,dim_y,dim_z}}, + {{0u,1u,1u}} + ); + + // Outflow + iterator::apply( + [&](auto& index){ + info_f.at(index).set(4u); + }, + {{dim_x-1u,0u,0u}}, + {{dim_x,dim_y,dim_z}}, + {{0u,1u,1u}} + ); + // + auto& df_f = fields.template get<"dfs_old">(); + auto& rho_f = macros.template get<"density">(); + auto& vel_f = macros.template get<"velocity">(); + auto& por_f = macros.template get<"porosity">(); + + iterator::apply( + [&](auto& index){ + auto& df = df_f.at(index); + auto& rho = rho_f.at(index); + por_f.at(index).at({}) = {1}; + rho.at({}) = {1}; + auto& vel = vel_f.at(index); + auto eq = equilibrium(rho,vel); + + df = eq; + }, + {},// 0-index + df_f.get_dims() + ); + + iterator::apply( + [&](auto& index){ + auto& df = df_f.at(index); + auto& rho = rho_f.at(index); + rho.at({}) = {1}; + auto& vel = vel_f.at(index); + vel.at({{0u}}) = 0.1; + auto eq = equilibrium(rho,vel); + + df = eq; + }, + {},// 0-index + df_f.get_dims(), + {{1u,1u,1u}} + ); + + for(saw::data i{0u}; i < saw::data{particle_size}; ++i){ + auto& part = particles.at(i); + } + + return saw::make_void(); +} + +template +saw::error_or step( + saw::data>,encode::Sycl>& fields, + saw::data>,encode::Sycl>& macros, + saw::data t_i, + device& dev +){ + auto& q = dev.get_handle(); + auto& info_f = fields.template get<"info">(); + + // auto coll_ev = + q.submit([&](acpp::sycl::handler& h){ + // Need nicer things to handle the flow. I see improvement here + component> collision{0.6}; + // component> collision{0.6}; + component> bb; + + saw::data> rho_b; + rho_b.at({}) = 1.0; + saw::data> vel_b; + vel_b.at({{0u}}) = 0.1; + + component> equi{rho_b,vel_b}; + + component,encode::Sycl> flow_in{ + [&](){ + uint64_t target_t_i = 256u; + if(t_i.get() < target_t_i){ + return 1.0 + (0.001 / target_t_i) * t_i.get(); + } + return 1.001; + }() + }; + component,encode::Sycl> flow_out{1.0}; + + h.parallel_for(acpp::sycl::range{dim_x,dim_y,dim_z}, [=](acpp::sycl::id idx){ + saw::data> index; + for(uint64_t i = 0u; i < Desc::D; ++i){ + index.at({{i}}).set(idx[i]); + } + + auto info = info_f.at(index); + + switch(info.get()){ + case 0u: + break; + case 1u: + bb.apply(fields,index,t_i); + break; + case 2u: + collision.apply(fields,macros,index,t_i); + break; + case 3u: + equi.apply(fields,index,t_i); + // flow_in.apply(fields,index,t_i); + collision.apply(fields,macros,index,t_i); + break; + case 4u: + equi.apply(fields,index,t_i); + // flow_out.apply(fields,index,t_i); + collision.apply(fields,macros,index,t_i); + break; + default: + break; + } + }); + }).wait(); + + q.submit([&](acpp::sycl::handler& h){ + component> stream; + + h.parallel_for(acpp::sycl::range{dim_x,dim_y,dim_z}, [=](acpp::sycl::id idx){ + saw::data> index; + for(uint64_t i = 0u; i < Desc::D; ++i){ + index.at({{i}}).set(idx[i]); + } + + auto info = info_f.at(index); + + if(info.get() > 0u){ + stream.apply(fields,index,t_i); + } + }); + }).wait(); + + // Step + /* + q.submit([&](acpp::sycl::handler& h){ + // h.depends_on(collision_ev); + }).wait(); + */ + + return saw::make_void(); +} +} +} + +template +saw::error_or lbm_main(int argc, char** argv){ + using namespace kel::lbm; + + using dfi = df_info; + + auto eo_lbm_dir = output_directory(); + if(eo_lbm_dir.is_error()){ + return std::move(eo_lbm_dir.get_error()); + } + auto& lbm_dir = eo_lbm_dir.get_value(); + + auto out_dir = lbm_dir / "poiseulle_particles_2d_gpu"; + + { + std::error_code ec; + std::filesystem::create_directories(out_dir,ec); + if(ec != std::errc{}){ + return saw::make_error("Could not create output directory"); + } + } + + converter conv { + // delta_x + {{1.0}}, + // delta_t + {{1.0}} + }; + + // saw::data> meta{{dim_x,dim_y}}; + auto lbm_data_ptr = saw::heap>>(); + auto lbm_macro_data_ptr = saw::heap>>(); + auto lbm_particle_data_ptr = saw::heap, particle_size>>>(); + + auto eo_aio = saw::setup_async_io(); + if(eo_aio.is_error()){ + return std::move(eo_aio.get_error()); + } + auto& aio = eo_aio.get_value(); + saw::wait_scope wait{aio.event_loop}; + + bool krun = true; + bool print_status = false; + aio.event_port.on_signal(saw::Signal::Terminate).then([&](){ + krun = false; + }).detach(); + aio.event_port.on_signal(saw::Signal::User1).then([&](){ + print_status = true; + }).detach(); + + device dev; + + auto& sycl_q = dev.get_handle(); + + sycl_q.wait(); + { + auto eov = setup_initial_conditions(*lbm_data_ptr,*lbm_macro_data_ptr,*lbm_particle_data_ptr); + if(eov.is_error()){ + return eov; + } + } + + saw::data, encode::Sycl> lbm_sycl_data{sycl_q}; + saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; + saw::data, particle_size>, encode::Sycl> lbm_sycl_particle_data{sycl_q}; + sycl_q.wait(); + + auto lsd_view = make_chunk_struct_view(lbm_sycl_data); + auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + { + auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); + if(eov.is_error()){ + return eov; + } + } + { + auto eov = dev.copy_to_device(*lbm_macro_data_ptr,lbm_sycl_macro_data); + if(eov.is_error()){ + return eov; + } + } + { + auto eov = dev.copy_to_device(*lbm_particle_data_ptr,lbm_sycl_particle_data); + if(eov.is_error()){ + return eov; + } + } + sycl_q.wait(); + saw::data time_steps{2048ul}; + + for(saw::data i{0u}; i < time_steps and krun; ++i){ + { + { + std::string file_name = "t_"; + file_name += std::to_string(i.get()); + file_name += ".vtk"; + auto eov = write_vtk_file(out_dir/file_name, *lbm_macro_data_ptr); + if(eov.is_error()){ + return eov; + } + } + /*{ + std::string file_name = "p_"; + file_name += std::to_string(i.get()); + file_name += ".json"; + auto eov = saw::easy::encode_and_write_file,particle_size>,saw::encode::Json>(out_dir, *lbm_particle_data_ptr); + if(eov.is_error()){ + return eov; + } + }*/ + } + { + auto eov = step(lsd_view,lsdm_view,i,dev); + if(eov.is_error()){ + return eov; + } + } + { + auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); + if(eov.is_error()){ + return eov; + } + } + { + auto eov = dev.copy_to_host(lbm_sycl_data,*lbm_data_ptr); + if(eov.is_error()){ + return eov; + } + } + + wait.poll(); + if(print_status){ + std::cout<<"Status: "<().get() * 100 / time_steps.get())<<"%"<::apply( + [&](auto& index){ + std::cout<().at(index).template cast_to().get()<<"\n"; + }, + {{0u,0u}}, + {{dim_x, dim_y}} + ); + */ + + sycl_q.wait(); + return saw::make_void(); +} + +using FloatT = kel::lbm::sch::Float32; + +int main(int argc, char** argv){ + auto eov = lbm_main(argc, argv); + if(eov.is_error()){ + auto& err = eov.get_error(); + std::cerr<<"[Error] "< 0u){ + std::cerr<<" - "< step( // auto coll_ev = q.submit([&](acpp::sycl::handler& h){ // Need nicer things to handle the flow. I see improvement here - component> collision{0.6}; + component> collision{0.6}; + // component> collision{0.6}; component> bb; saw::data> rho_b; @@ -329,7 +330,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); - saw::data time_steps{256ul}; + saw::data time_steps{2048ul}; for(saw::data i{0u}; i < time_steps and krun; ++i){ { -- cgit v1.2.3