#include #include #include #include #include #include #include #include namespace saw { namespace encode { template struct Sycl { }; } template class data, encode::Sycl> { public: using Schema = schema::Array; private: using SyclKelAllocator = acpp::sycl::usm_allocator, acpp::sycl::usm::alloc::shared>; std::vector, SyclKelAllocator> data_; data size_; public: data(const data& host_data__): data_{&host_data__.at({0u}),host_data__.size().get()}, size_{host_data__.size()} {} auto& get_handle() { return data_; } const auto& get_handle() const { return data_; } data internal_size() const { return size_; } data* internal_data() { return &data_[0u]; } const data* internal_data() const { return data_.data(); } }; } namespace kel { namespace lbm { namespace sch { using namespace saw::schema; /** * Basic distribution function * Base type * D * Q * Scalar factor * D factor * Q factor */ using T = Float32; using D2Q9 = Descriptor<2u,9u>; template using DfCell = Cell; template using CellInfo = Cell; /** * Basic type for simulation */ template using CellStruct = CellFieldStruct< Desc, Struct< Member>, "dfs">, Member>, "dfs_old">, Member>, "info"> > >; template using MacroStruct = Struct< Member, "velocity">, Member >; using CavityFieldD2Q9 = CellStruct; } /* template class df_cell_view; */ /** * Minor helper for the AA-Pull Pattern, so I can use only one lattice * * Am I sure I want to use AA this way? * Esoteric Twist technically reduces the needed memory access footprint */ /* template class df_cell_view, Encode> { public: using Schema = sch::Cell; private: std::array::type>*, QN> view_; public: df_cell_view(const std::array::type>*, QN>& view): view_{view} {} }; */ namespace cmpt { struct MovingWall {}; } /** * Full-Way moving wall Bounce back, something is not right here. * Technically it should reflect properly. */ template class component { public: std::array::type, Desc::D> lid_vel; public: void apply( saw::data>& dfs ){ using dfi = df_info; // Technically use .copy() /* auto dfs_cpy = dfs; for(uint64_t i = 0u; i < Desc::Q; ++i){ dfs({dfi::opposite_index.at(i)}) = dfs_cpy({i}) - 2.0 * dfi::weights[i] * 1.0 * ( lid_vel[0] * dfi::directions[i][0] + lid_vel[1] * dfi::directions[i][1]) * dfi::inv_cs2; } */ } }; constexpr size_t dim_x = 128; constexpr size_t dim_y = 128; template void set_geometry( saw::data>* info_field ){ using namespace kel::lbm; using namespace acpp; saw::data> meta{{dim_x,dim_y}}; /** * Set ghost */ iterate_over([&](const saw::data>& index){ size_t i = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto& info = info_field[i]; info({0u}).set(0u); }, {{0u,0u}}, meta); /** * Set wall */ iterate_over([&](const saw::data>& index){ size_t i = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto& info = info_field[i]; info({0u}).set(1u); }, {{0u,0u}}, meta, {{1u,1u}}); /** * Set fluid */ iterate_over([&](const saw::data>& index){ size_t i = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto& info = info_field[i]; info({0u}).set(2u); }, {{0u,0u}}, meta, {{2u,2u}}); /** * Set top lid */ iterate_over([&](const saw::data>& index){ size_t i = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto& info = info_field[i]; info({0u}).set(3u); }, {{0u,1u}}, {{meta.at({0u}), 2u}}, {{2u,0u}}); } template void set_initial_conditions( saw::data>* info_field, saw::data>* dfs_field, saw::data>* dfs_old_field ){ using namespace kel::lbm; using namespace acpp; saw::data rho{1.0}; saw::data> vel{{0.0,0.0}}; auto eq = equilibrium(rho, vel); saw::data> meta{{dim_x,dim_y}}; /** * Set distribution */ iterate_over([&](const saw::data>& index){ size_t i = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto& dfs = dfs_field[i]; auto& dfs_old = dfs_old_field[i]; for(saw::data k = 0; k < saw::data{Desc::Q}; ++k){ dfs(k) = eq.at(k); dfs_old(k) = eq.at(k); } }, {{0u,0u}}, meta); } template void lbm_step( saw::data>* info_r, saw::data>* dfs_r, saw::data>* dfs_r_old, bool is_even, uint64_t time_step, acpp::sycl::queue& sycl_q ){ using namespace kel::lbm; using namespace acpp; using dfi = df_info; //component coll{0.59}; constexpr saw::data frequency{1.0 / 0.59}; //component bb; component bb_lid; bb_lid.lid_vel = {0.1, 0.0}; // Submit collision kernel sycl_q.submit([&](sycl::handler& cgh) { // Accessor for latt with read/write cgh.parallel_for( sycl::range<2>{dim_x, dim_y}, [=](sycl::id<2> idx) { size_t i = idx[0]; size_t j = idx[1]; size_t acc_id = i * dim_x + j; // Read cell info auto& info = info_r[acc_id]; switch (info({0u}).get()) { case 1u: { // bb.apply(latt_acc, {i, j}, time_step); auto& dfs_old = is_even ? dfs_r_old[acc_id] : dfs_r[acc_id]; auto df_cpy = dfs_old.copy(); for(uint64_t i = 1u; i < Desc::Q; ++i){ dfs_old({i}) = df_cpy({dfi::opposite_index.at(i)}); } break; } case 2u: { // coll.apply(latt_acc, {i, j}, time_step); auto& dfs_old = is_even ? dfs_r_old[acc_id] : dfs_r[acc_id]; saw::data rho; saw::data> vel; compute_rho_u(dfs_old,rho,vel); auto eq = equilibrium(rho,vel); for(uint64_t i = 0u; i < Desc::Q; ++i){ dfs_old({i}) = dfs_old({i}) + frequency * (eq.at(i) - dfs_old({i})); } break; } default: // Do nothing break; } }); }); // Submit streaming kernel sycl_q.submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl::range<2>{dim_x - 2, dim_y - 2}, [=](sycl::id<2> idx) { size_t i = idx[0] + 1; // avoid boundary size_t j = idx[1] + 1; size_t acc_id = i * dim_x + j; auto& dfs_new = is_even ? dfs_r[acc_id] : dfs_r_old[acc_id]; auto& info = info_r[acc_id]; if (info({0u}).get() > 0u && info({0u}).get() != 3u) { for (uint64_t k = 0u; k < Desc::Q; ++k) { auto dir = dfi::directions[dfi::opposite_index[k]]; // auto& cell_dir_old = latt_acc({i + dir[0], j + dir[1]}); // size_t acc_old_id = (i+dir[0]) * dim_x + (j+dir[1]); auto& dfs_old = is_even ? dfs_r_old[acc_old_id] : dfs_r[acc_old_id]; auto& info_old = info_r[acc_old_id]; if (info_old({0}).get() == 3u) { auto& dfs_old_loc = is_even ? dfs_r_old[acc_id] : dfs_r[acc_id]; dfs_new({k}) = dfs_old_loc({dfi::opposite_index.at(k)}) - 2.0 * dfi::inv_cs2 * dfi::weights.at(k) * 1.0 * (bb_lid.lid_vel[0] * dir[0] + bb_lid.lid_vel[1] * dir[1]); } else { dfs_new({k}) = dfs_old({k}); } } } }); }); } } } int main(){ using namespace kel::lbm; using namespace acpp; saw::data> meta{{dim_x, dim_y}}; constexpr size_t dim_size = dim_x * dim_y; converter conv{ {0.1}, {0.1} }; print_lbm_meta(conv, {1e-3}); auto eo_lbm_dir = output_directory(); if(eo_lbm_dir.is_error()){ return -1; } auto& lbm_dir = eo_lbm_dir.get_value(); auto out_dir = lbm_dir / "cavity_gpu_2d"; acpp::sycl::queue sycl_q{acpp::sycl::default_selector_v, acpp::sycl::property::queue::in_order{}}; constexpr size_t num_cells = dim_x * dim_y; // Allocate USM shared memory for your main data auto* info = sycl::malloc_shared>>(num_cells, sycl_q); auto* dfs = sycl::malloc_shared>>(num_cells, sycl_q); auto* dfs_old = sycl::malloc_shared>>(num_cells, sycl_q); /** * Set meta information describing what this cell is */ set_geometry(info); /** * */ set_initial_conditions(info,dfs,dfs_old); /** * Timeloop */ uint64_t lattice_steps = 512000u; bool even_step = true; uint64_t print_every = 256u; uint64_t file_no = 0u; saw::data,sch::D2Q9::D>> macros{meta}; std::cout<<"Start"<(info, dfs, dfs_old, even_step, i, sycl_q); if(i % 1024u == 0u){ sycl_q.wait(); iterate_over([&](const saw::data>& index){ size_t j = index.at({0u}).get() * dim_x + index.at({1u}).get(); auto dfs_field = even_step ? dfs_old : dfs; auto& rho = macros.at(index).template get<"pressure">(); auto& vel = macros.at(index).template get<"velocity">(); compute_rho_u(dfs_field[j],rho,vel); }, {{0u,0u}}, meta); std::string vtk_f_name{"tmp/cavity_2d_gpu_"}; vtk_f_name += std::to_string(i) + ".vtk"; write_vtk_file(vtk_f_name, macros); } even_step = not even_step; } auto stop = std::chrono::steady_clock::now(); std::cout<