From 92f5645809449f56c39c0e4c6c29045b8a4acea6 Mon Sep 17 00:00:00 2001
From: "Claudius \"keldu\" Holeksa" <mail@keldu.de>
Date: Mon, 16 Feb 2026 14:39:38 +0100
Subject: Dangling changes

---
 examples/poiseulle_3d_gpu/.nix/derivation.nix  |  10 +-
 examples/poiseulle_3d_gpu/SConscript           |   6 +-
 examples/poiseulle_3d_gpu/SConstruct           |   4 +-
 examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp | 381 ----------------------
 examples/poiseulle_3d_gpu/sim.cpp              | 422 +++++++++++++++++++++++++
 examples/poiseulle_particles_2d_gpu/sim.cpp    |   5 +-
 6 files changed, 437 insertions(+), 391 deletions(-)
 delete mode 100644 examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp
 create mode 100644 examples/poiseulle_3d_gpu/sim.cpp

(limited to 'examples')
diff --git a/examples/poiseulle_3d_gpu/.nix/derivation.nix b/examples/poiseulle_3d_gpu/.nix/derivation.nix
index ee78fc8..74ea0ab 100644
--- a/examples/poiseulle_3d_gpu/.nix/derivation.nix
+++ b/examples/poiseulle_3d_gpu/.nix/derivation.nix
@@ -7,7 +7,7 @@
 , pname
 , version
 , adaptive-cpp
-, kel-lbm
+, kel
 }:
 
 stdenv.mkDerivation {
@@ -26,11 +26,13 @@ stdenv.mkDerivation {
 		forstio.async
 		forstio.codec
 		forstio.codec-unit
-		forstio.remote
+		forstio.io
+    forstio.remote
+    forstio.remote-filesystem
 		forstio.codec-json
 		adaptive-cpp
-		kel-lbm.core
-#		kel-lbm.sycl
+		kel.lbm.core
+		kel.lbm.sycl
 	];
 
 	preferLocalBuild = true;
diff --git a/examples/poiseulle_3d_gpu/SConscript b/examples/poiseulle_3d_gpu/SConscript
index 2525367..7c8bbf1 100644
--- a/examples/poiseulle_3d_gpu/SConscript
+++ b/examples/poiseulle_3d_gpu/SConscript
@@ -22,12 +22,12 @@ env.headers += examples_env.headers;
 
 # Cavity2D
 examples_objects = [];
-examples_env.add_source_files(examples_objects, ['poiseulle_3d_gpu.cpp'], shared=False);
-examples_env.poiseulle_3d_gpu = examples_env.Program('#bin/poiseulle_3d_gpu', [examples_objects]);
+examples_env.add_source_files(examples_objects, ['sim.cpp'], shared=False);
+examples_env.poiseulle_2d_gpu = examples_env.Program('#bin/poiseulle_particles_3d_gpu', [examples_objects]);
 
 # Set Alias
 env.examples = [
-	examples_env.poiseulle_3d_gpu
+	examples_env.poiseulle_2d_gpu
 ];
 env.Alias('examples', env.examples);
 env.targets += ['examples'];
diff --git a/examples/poiseulle_3d_gpu/SConstruct b/examples/poiseulle_3d_gpu/SConstruct
index a7201cb..0611b67 100644
--- a/examples/poiseulle_3d_gpu/SConstruct
+++ b/examples/poiseulle_3d_gpu/SConstruct
@@ -57,7 +57,9 @@ env=Environment(ENV=os.environ, variables=env_vars, CPPPATH=[],
         '-Wextra'
     ],
     LIBS=[
-        'forstio-core'
+        'forstio-core',
+        'forstio-async',
+        'forstio-io'
     ]
 );
 env.__class__.add_source_files = add_kel_source_files
diff --git a/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp b/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp
deleted file mode 100644
index c221f42..0000000
--- a/examples/poiseulle_3d_gpu/poiseulle_3d_gpu.cpp
+++ /dev/null
@@ -1,381 +0,0 @@
-#include <kel/lbm/lbm.hpp>
-#include <AdaptiveCpp/sycl/sycl.hpp>
-
-#include <forstio/codec/data.hpp>
-
-template<typename T>
-using SyclHostAlloc = acpp::sycl::usm_allocator<saw::data<T>, acpp::sycl::usm::alloc::host>;
-
-namespace kel {
-namespace lbm {
-namespace sch {
-using namespace saw::schema;
-
-/**
- * Basic distribution function
- * Base type
- * D
- * Q
- * Scalar factor
- * D factor
- * Q factor
- */
-using T = Float64;
-using D2Q9 = Descriptor<2u,9u>;
-
-using DfCell = Cell<T, D2Q9, 0u, 0u, 1u>;
-using CellInfo = Cell<UInt8, D2Q9, 1u, 0u, 0u>;
-
-/**
- * Basic type for simulation
- */
-using CellStruct = Struct<
-	Member<DfCell, "dfs">,
-	Member<DfCell, "dfs_old">,
-	Member<CellInfo, "info">
->;
-
-using MacroStruct = Struct<
-	Member<FixedArray<Float64,D2Q9::D>, "velocity">,
-	Member<Float64, "pressure">
->;
-
-}
-
-namespace cmpt {
-template<bool East>
-struct PressureBoundaryRestrictedVelocityTo {};
-}
-
-template<typename FP,typename Desc, bool East>
-struct component<FP,Desc, cmpt::PressureBoundaryRestrictedVelocityTo<East>> {
-private:
-	saw::data<FP> pressure_setting_;
-	saw::data<FP> rho_setting_;
-public:
-	component(const saw::data<FP>& pressure_setting__):
-		pressure_setting_{pressure_setting__},
-		rho_setting_{pressure_setting__ * df_info<FP,Desc>::inv_cs2}
-	{}
-
-	template<typename CellFieldSchema>
-	void apply(
-			saw::data<CellFieldSchema>* field,
-			saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index,
-			const saw::data<sch::FixedArray<sch::UInt64,Desc::D>>& meta,
-			uint64_t time_step
-		) const {
-
-		using dfi = df_info<FP,Desc>;
-
-		auto flat_ind = flatten_index<sch::UInt64,Desc::D>::apply(index,meta);
-
-		bool is_even = ((time_step % 2u) == 0u);
-		auto& cell = field[flat_ind.get()];
-
-		auto& info = cell.template get<"info">();
-		if(info({0u}).get() == 0u){
-			return;
-		}
-		auto& dfs_old = (is_even) ? cell.template get<"dfs_old">() : cell.template get<"dfs">();
-
-		/**
-		 * Sum all known DFs
-		 */
-		saw::data<FP> sum_df{0u};
-		for(saw::data<sch::UInt64> k{0u}; k < saw::data<sch::UInt64>{Desc::Q}; ++k){
-			auto c_k = dfi::directions[k.get()];
-			auto flat_ind_n = flatten_index<sch::UInt64,Desc::D>::apply({{index.at({0u})+c_k[0u], index.at({1u})+c_k[1u]}},meta);
-			auto& cell_n = field[flat_ind_n.get()];
-			auto& info_n = cell_n.template get<"info">();
-			auto info_n_val = info_n({0u});
-			auto k_opp = dfi::opposite_index[k.get()];
-
-			if(info_n_val.get() > 0u){
-				sum_df += dfs_old({k_opp});
-			}
-		}
-		/**
-		 * Get the sum of the unknown dfs and precalculate the direction
-		 */
-		constexpr int known_dir = East ? 1 : -1;
-		auto sum_unknown_dfs = (rho_setting_ - sum_df) * saw::data<FP>{known_dir};
-
-		for(saw::data<sch::UInt64> k{0u}; k < saw::data<sch::UInt64>{Desc::Q}; ++k){
-			auto c_k = dfi::directions[k.get()];
-			auto flat_ind_n = flatten_index<sch::UInt64,Desc::D>::apply({{index.at({0u})+c_k[0u], index.at({1u})+c_k[1u]}},meta);
-			auto& cell_n = field[flat_ind_n.get()];
-			auto& info_n = cell_n.template get<"info">();
-			auto info_n_val = info_n({0u});
-
-			if(info_n_val.get() > 0u){
-				sum_unknown_dfs += dfs_old({k}) * c_k[0u];
-			}
-		}
-
-		auto vel_x = sum_unknown_dfs / rho_setting_;
-
-		if constexpr (East) {
-			dfs_old({2u}) = dfs_old({1u}) + saw::data<FP>{2.0 / 3.0} * rho_setting_ * vel_x;
-			dfs_old({6u}) = dfs_old({5u}) + saw::data<FP>{1.0 / 6.0} * rho_setting_ * vel_x + saw::data<FP>{0.5} * (dfs_old({4u}) - dfs_old({3u}));
-			dfs_old({8u}) = dfs_old({7u}) + saw::data<FP>{1.0 / 6.0} * rho_setting_ * vel_x + saw::data<FP>{0.5} * (dfs_old({3u}) - dfs_old({4u}));
-		}else if constexpr (not East){
-			dfs_old({1u}) = dfs_old({2u}) - saw::data<FP>{2.0 / 3.0} * rho_setting_ * vel_x;
-			dfs_old({5u}) = dfs_old({6u}) - saw::data<FP>{1.0 / 6.0} * rho_setting_ * vel_x + saw::data<FP>{0.5} * (dfs_old({3u}) - dfs_old({4u}));
-			dfs_old({7u}) = dfs_old({8u}) - saw::data<FP>{1.0 / 6.0} * rho_setting_ * vel_x + saw::data<FP>{0.5} * (dfs_old({4u}) - dfs_old({3u}));
-		}
-	}
-};
-
-/**
- * This is massively hacky and expects a lot of conditions
- * Either this or mirrored along the horizontal line works
- *
- * 0 - 2 - 2
- * 0 - 3 - 1
- * 0 - 3 - 1
- * .........
- * 0 - 3 - 1
- * 0 - 2 - 2
- *
- */
-
-template<typename Desc>
-saw::error_or<void> set_geometry(
-	saw::data<sch::CellStruct>* cells,
-	const saw::data<sch::FixedArray<sch::UInt64,Desc::D>>& meta,
-	acpp::sycl::queue& sycl_q
-){
-	using namespace kel::lbm;
-	
-	saw::data<sch::T> rho{1.0};
-	saw::data<sch::FixedArray<sch::T,Desc::D>> vel{{0.0,0.0}};
-	auto eq = equilibrium<sch::T,Desc>(rho, vel);
-
-	sycl_q.submit([&](acpp::sycl::handler& h){
-		h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id<2> idx){
-			size_t i = idx[0];
-			size_t j = idx[1];
-			size_t acc_id = j * meta.at({0u}).get() + i;
-
-			auto& c = cells[acc_id];
-			auto& info = c.template get<"info">()({0});
-			auto& dfs = c.template get<"dfs">();
-			auto& dfs_old = c.template get<"dfs_old">();
-
-			if(i >= 2u and j >= 2u and (i+2u) < meta.at({0u}).get() and (j+2u) < meta.at({1u}).get()){
-				// Fluid
-				info.set({2u});
-			}else if(((j+2u) == meta.at({1u}).get() or j == 1u) and (i>=1u and (i+1u)<meta.at({0u}).get() )){
-				// Wall
-				info.set({1u});
-			}else if((i==1u) and (j >= 1 and (j+1 < meta.at({1u}).get()) ) ){
-				// Left input
-				info.set({3u});
-			}else if((i+2u) == meta.at({0u}).get() and (j >= 1 and (j+1) < meta.at({1u}).get() )){
-				// Right output
-				info.set({4u});
-			}else {
-				info.set({0u});
-			}
-			for(saw::data<sch::UInt64> k{0u}; k < saw::data<sch::UInt64>{Desc::Q}; ++k){
-				dfs(k) = eq.at(k);
-				dfs_old(k) = eq.at(k);
-			}
-		});
-	}).wait();
-
-	return saw::make_void();
-}
-
-template<typename Desc>
-void step(
-	saw::data<sch::CellStruct>* cells,
-	saw::data<sch::MacroStruct>* macro_cells,
-	const saw::data<sch::FixedArray<sch::UInt64,Desc::D>>& meta,
-	uint64_t time_step,
-	acpp::sycl::queue& sycl_q
-){
-	using namespace kel::lbm;
-	using dfi = df_info<sch::T,Desc>;
-	
-	constexpr saw::data<sch::T> frequency{1.0 / 0.5384};
-
-	bool is_even = ((time_step % 2u) == 0u);
-  /**
-	* 1. Relaxation parameter \tau
-	*/
-  /*
-	component<sch::T, sch::D2Q9, cmpt::BGK> coll{0.5384};
-	component<sch::T, sch::D2Q9, cmpt::BounceBack> bb;
-	*/
-	component<sch::T, Desc, cmpt::PressureBoundaryRestrictedVelocityTo<true>> inlet{1.1 * dfi::cs2};
-	component<sch::T, Desc, cmpt::PressureBoundaryRestrictedVelocityTo<false>> outlet{1.0 * dfi::cs2};
-
-			
-	// auto collision_ev = 
-	sycl_q.submit([&](acpp::sycl::handler& h){
-		
-		/// Collision
-		h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id<Desc::D> idx){
-			size_t i = idx[0];
-			size_t j = idx[1];
-			size_t acc_id = j * meta.at({0u}).get() + i;
-			
-			auto& c = cells[acc_id];
-			auto& info = cells[acc_id].template get<"info">();
-
-			switch (info({0u}).get()) {
-				// Bounce Back
-			 case 1u: {
-				auto& dfs_old = is_even ? c.template get<"dfs_old">() : c.template get<"dfs">();
-				auto df_cpy = dfs_old.copy();
-
-				for(uint64_t k = 1u; k < Desc::Q; ++k){
-					dfs_old({k}) = df_cpy({dfi::opposite_index.at(k)});
-				}
-
-				break;
-			 }
-			 // Collision
-			 case 2u: {
-				// coll.apply(latt_acc, {i, j}, time_step);
-				auto& dfs_old = is_even ? c.template get<"dfs_old">() : c.template get<"dfs">();
-
-				auto& macro_c = macro_cells[acc_id];
-
-				saw::data<sch::T>& rho = macro_c.template get<"pressure">();
-				saw::data<sch::FixedArray<sch::T,Desc::D>>& vel = macro_c.template get<"velocity">();
-
-				compute_rho_u<sch::T,Desc>(dfs_old,rho,vel);
-				auto eq = equilibrium<sch::T,Desc>(rho,vel);
-
-				for(uint64_t k = 0u; k < Desc::Q; ++k){
-					dfs_old({k}) = dfs_old({k}) + frequency * (eq.at({k}) - dfs_old({k}));
-				}
-				break;
-			}
-			case 3u: {
-				inlet.apply(cells, {{i,j}}, meta, time_step);
-				break;
-			}
-			case 4u: {
-				outlet.apply(cells, {{i,j}}, meta, time_step);
-				break;
-			}
-			default:
-				// Do nothing
-				break;
-			}
-		});
-	}).wait();
-	
-	//auto stream_ev = 
-	sycl_q.submit([&](acpp::sycl::handler& h){
-		/// Stream
-		h.parallel_for(acpp::sycl::range<2>{meta.at({0}).get(), meta.at({1}).get()},[=](acpp::sycl::id<Desc::D> idx){
-			size_t i = idx[0];
-			size_t j = idx[1];
-			size_t acc_id = j * meta.at({0u}).get() + i;
-
-			auto& c = cells[acc_id];
-			auto& info = c.template get<"info">();
-			auto& dfs_new = is_even ? c.template get<"dfs">() : c.template get<"dfs_old">();
-
-			if (info({0u}).get() > 0u) {
-				for (uint64_t k = 0u; k < Desc::Q; ++k) {
-					auto dir = dfi::directions[dfi::opposite_index[k]];
-					size_t acc_old_id = (j+dir[1]) * meta.at({0u}).get() + (i+dir[0]);
-
-					auto& dfs_old = is_even ? cells[acc_old_id].template get<"dfs_old">() : cells[acc_old_id].template get<"dfs">();
-					auto& info_old = cells[acc_old_id].template get<"info">();
-
-					dfs_new({k}) = dfs_old({k});
-				}
-			}
-
-		});
-	}).wait();
-}
-}
-}
-
-template<typename T, typename Desc>
-saw::error_or<void> kel_main(int argc, char** argv){
-	using namespace kel;
-
-	using dfi = lbm::df_info<T,Desc>;
-
-	auto eo_lbm_dir = lbm::output_directory();
-	if(eo_lbm_dir.is_error()){
-		return std::move(eo_lbm_dir.get_error());
-	}
-	auto& lbm_dir = eo_lbm_dir.get_value();
-	auto out_dir = lbm_dir / "poiseulle_channel_2d_gpu";
-
-	// Create Dir TODO
-
-	// 
-	lbm::converter<lbm::sch::Float64> conv {
-		// delta_x
-		{{1.0}},
-		// delta_t
-		{{1.0}}
-	};
-
-	uint64_t x_d = 512u;
-	uint64_t y_d = 128u;
-	uint64_t z_d = 128u;
-
-	saw::data<lbm::sch::FixedArray<lbm::sch::UInt64,Desc::D>> meta{{x_d,y_d,z_d}};
-
-	acpp::sycl::queue sycl_q;
-	SyclHostAlloc<lbm::sch::MacroStruct> sycl_host_alloc{sycl_q};
-	// SyclDeviceAlloc<lbm::sch::CellStruct> sycl_dev_alloc{sycl_q};
-
-	std::vector<saw::data<lbm::sch::MacroStruct>, SyclHostAlloc<lbm::sch::MacroStruct>> host_cells{x_d * y_d * z_d,sycl_host_alloc};
-
-	saw::data<lbm::sch::CellStruct>* cells = acpp::sycl::malloc_device<saw::data<lbm::sch::CellStruct>>(x_d * y_d * z_d,sycl_q);
-	saw::data<lbm::sch::MacroStruct>* macro_cells = acpp::sycl::malloc_device<saw::data<lbm::sch::MacroStruct>>(x_d * y_d * z_d,sycl_q);
-	{
-		auto eov = lbm::set_geometry<Desc>(cells,meta,sycl_q);
-		if(eov.is_error()){
-			return eov;
-		}
-	}
-
-
-	for(uint64_t i = 0u; i < 1024u*128u; ++i){
-		lbm::step<Desc>(cells,macro_cells,meta,i,sycl_q);
-		sycl_q.wait();
-		if(i%4u == 0u){
-			std::string vtk_f_name{"tmp/poiseulle_2d_gpu_"};
-			vtk_f_name += std::to_string(i) + ".vtk";
-			// write_vtk_file(vtk_f_name,host_cells);
-			sycl_q.memcpy(&host_cells[0u], macro_cells, x_d * y_d * z_d * sizeof(saw::data<lbm::sch::MacroStruct>) ).wait();
-			lbm::write_vtk_file<lbm::sch::MacroStruct,Desc::D>(vtk_f_name, &host_cells[0], meta);
-		}
-	}
-
-	sycl_q.wait();
-	acpp::sycl::free(cells, sycl_q);
-	acpp::sycl::free(macro_cells, sycl_q);
-	sycl_q.wait();
-
-	return saw::make_void();
-}
-
-int main(int argc, char** argv){
-	auto eov = kel_main<kel::lbm::sch::T,kel::lbm::sch::D3Q27>(argc, argv);
-	if(eov.is_error()){
-		auto& err = eov.get_error();
-		std::cerr<<"[Error] "<<err.get_category();
-		auto err_msg = err.get_message();
-		if(err_msg.size() > 0u){
-			std::cerr<<" - "<<err_msg;
-		}
-		std::cerr<<std::endl;
-		return err.get_id();
-	}
-	return 0;
-}
diff --git a/examples/poiseulle_3d_gpu/sim.cpp b/examples/poiseulle_3d_gpu/sim.cpp
new file mode 100644
index 0000000..e3305d6
--- /dev/null
+++ b/examples/poiseulle_3d_gpu/sim.cpp
@@ -0,0 +1,422 @@
+#include <kel/lbm/sycl/lbm.hpp>
+#include <kel/lbm/lbm.hpp>
+#include <kel/lbm/particle.hpp>
+
+#include <forstio/io/io.hpp>
+#include <forstio/remote/filesystem/easy.hpp>
+#include <forstio/codec/json/json.hpp>
+#include <forstio/codec/simple.hpp>
+
+namespace kel {
+namespace lbm {
+
+constexpr uint64_t dim_x = 1024ul;
+constexpr uint64_t dim_y = 512ul;
+constexpr uint64_t dim_z = 512ul;
+constexpr uint64_t particle_size = 128ul;
+
+namespace sch {
+using namespace saw::schema;
+
+using InfoChunk = Chunk<UInt8, 0u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using DfChunk = Chunk<FixedArray<T,Desc::Q>, 1u, dim_x, dim_y, dim_z>;
+
+template<typename T, typename Desc>
+using ScalarChunk = Chunk<Scalar<T>, 0u, dim_x, dim_y, dim_z>;
+
+template<typename T, typename Desc>
+using VectorChunk = Chunk<Vector<T,Desc::D>, 0u, dim_x, dim_y, dim_z>;
+
+template<typename T, typename Desc>
+using ChunkStruct = Struct<
+	Member<InfoChunk, "info">,
+	Member<DfChunk<T,Desc>, "dfs">,
+	Member<DfChunk<T,Desc>, "dfs_old">,
+	Member<VectorChunk<T,Desc>, "particle_N">,
+	Member<ScalarChunk<T,Desc>, "particle_D">
+>;
+
+template<typename T, typename Desc>
+using VelChunk = Chunk<Vector<T,Desc::D>, 0u, dim_x, dim_y, dim_z>;
+
+template<typename T>
+using RhoChunk = Chunk<Scalar<T>, 0u, dim_x, dim_y, dim_z>;
+
+template<typename T, typename Desc>
+using MacroStruct = Struct<
+	Member<VelChunk<T,Desc>, "velocity">,
+	Member<RhoChunk<T>, "density">,
+	Member<ScalarChunk<T,Desc>, "porosity">
+>;
+
+//template<typename T, typename Desc>
+//using ParticleArray = Array<
+//	Particle<T,Desc::D>
+//>;
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> setup_initial_conditions(
+		saw::data<sch::ChunkStruct<T,Desc>>& fields,
+		saw::data<sch::MacroStruct<T,Desc>>& macros,
+		saw::data<sch::FixedArray<sch::Particle<T,Desc::D>, particle_size>>& particles
+){
+	auto& info_f = fields.template get<"info">();
+	// Set everything as walls
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(1u);
+		},
+		{},
+		info_f.get_dims(),
+		{}
+	);
+	// Fluid
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(2u);
+		},
+		{},
+		info_f.get_dims(),
+		{{1u,1u,1u}}
+	);
+	
+	// Inflow
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(3u);
+		},
+		{{0u,0u,0u}},
+		{{1u,dim_y,dim_z}},
+		{{0u,1u,1u}}
+	);
+	
+	// Outflow
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(4u);
+		},
+		{{dim_x-1u,0u,0u}},
+		{{dim_x,dim_y,dim_z}},
+		{{0u,1u,1u}}
+	);
+	//
+	auto& df_f = fields.template get<"dfs_old">();
+	auto& rho_f = macros.template get<"density">();
+	auto& vel_f = macros.template get<"velocity">();
+	auto& por_f = macros.template get<"porosity">();
+	
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			auto& df = df_f.at(index);
+			auto& rho = rho_f.at(index);
+			por_f.at(index).at({}) = {1};
+			rho.at({}) = {1};
+			auto& vel = vel_f.at(index);
+			auto eq = equilibrium<T,Desc>(rho,vel);
+
+			df = eq;
+		},
+		{},// 0-index
+		df_f.get_dims()
+	);
+
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			auto& df = df_f.at(index);
+			auto& rho = rho_f.at(index);
+			rho.at({}) = {1};
+			auto& vel = vel_f.at(index);
+			vel.at({{0u}}) = 0.1;
+			auto eq = equilibrium<T,Desc>(rho,vel);
+
+			df = eq;
+		},
+		{},// 0-index
+		df_f.get_dims(),
+		{{1u,1u,1u}}
+	);
+
+	for(saw::data<sch::UInt64> i{0u}; i < saw::data<sch::UInt64>{particle_size}; ++i){
+		auto& part = particles.at(i);
+	}
+
+	return saw::make_void();
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> step(
+		saw::data<sch::Ptr<sch::ChunkStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& fields,
+		saw::data<sch::Ptr<sch::MacroStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& macros,
+		saw::data<sch::UInt64> t_i,
+		device& dev
+){
+	auto& q = dev.get_handle();
+	auto& info_f = fields.template get<"info">();
+
+	// auto coll_ev = 
+	q.submit([&](acpp::sycl::handler& h){
+		// Need nicer things to handle the flow. I see improvement here
+		component<T,Desc,cmpt::BGK, encode::Sycl<saw::encode::Native>> collision{0.6};
+		// component<T,Desc,cmpt::HLBM,encode::Sycl<saw::encode::Native>> collision{0.6};
+		component<T,Desc,cmpt::BounceBack,encode::Sycl<saw::encode::Native>> bb;
+
+		saw::data<sch::Scalar<T>> rho_b;
+		rho_b.at({}) = 1.0;
+		saw::data<sch::Vector<T,Desc::D>> vel_b;
+		vel_b.at({{0u}}) = 0.1;
+
+		component<T,Desc,cmpt::Equilibrium,encode::Sycl<saw::encode::Native>> equi{rho_b,vel_b};
+
+		component<T,Desc,cmpt::ZouHeHorizontal<true>,encode::Sycl<saw::encode::Native>> flow_in{
+			[&](){
+				uint64_t target_t_i = 256u;
+				if(t_i.get() < target_t_i){
+					return 1.0 + (0.001 / target_t_i) * t_i.get();
+				}
+				return 1.001;
+			}()
+		};
+		component<T,Desc,cmpt::ZouHeHorizontal<false>,encode::Sycl<saw::encode::Native>> flow_out{1.0};
+
+		h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y,dim_z}, [=](acpp::sycl::id<Desc::D> idx){
+			saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index;
+			for(uint64_t i = 0u; i < Desc::D; ++i){
+				index.at({{i}}).set(idx[i]);
+			}
+
+			auto info = info_f.at(index);
+			
+			switch(info.get()){
+				case 0u:
+				break;
+				case 1u:
+					bb.apply(fields,index,t_i);
+				break;
+				case 2u:
+					collision.apply(fields,macros,index,t_i);
+				break;
+				case 3u:
+					equi.apply(fields,index,t_i);
+					// flow_in.apply(fields,index,t_i);
+					collision.apply(fields,macros,index,t_i);
+				break;
+				case 4u:
+					equi.apply(fields,index,t_i);
+					// flow_out.apply(fields,index,t_i);
+					collision.apply(fields,macros,index,t_i);
+				break;
+				default:
+				break;
+			}
+		});
+	}).wait();
+
+	q.submit([&](acpp::sycl::handler& h){
+		component<T,Desc,cmpt::Stream,encode::Sycl<saw::encode::Native>> stream;
+
+		h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y,dim_z}, [=](acpp::sycl::id<Desc::D> idx){
+			saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index;
+			for(uint64_t i = 0u; i < Desc::D; ++i){
+				index.at({{i}}).set(idx[i]);
+			}
+			
+			auto info = info_f.at(index);
+			
+			if(info.get() > 0u){
+				stream.apply(fields,index,t_i);
+			}
+		});
+	}).wait();
+
+	// Step
+	/*
+	q.submit([&](acpp::sycl::handler& h){
+		// h.depends_on(collision_ev);
+	}).wait();
+	*/
+
+	return saw::make_void();
+}
+}
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> lbm_main(int argc, char** argv){
+	using namespace kel::lbm;
+
+	using dfi = df_info<T,Desc>;
+
+	auto eo_lbm_dir = output_directory();
+	if(eo_lbm_dir.is_error()){
+		return std::move(eo_lbm_dir.get_error());
+	}
+	auto& lbm_dir = eo_lbm_dir.get_value();
+
+	auto out_dir = lbm_dir / "poiseulle_particles_2d_gpu";
+
+	{
+		std::error_code ec;
+		std::filesystem::create_directories(out_dir,ec);
+		if(ec != std::errc{}){
+			return saw::make_error<saw::err::critical>("Could not create output directory");
+		}
+	}
+
+	converter<sch::Float64> conv {
+		// delta_x
+		{{1.0}},
+		// delta_t
+		{{1.0}}
+	};
+
+	// saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}};
+	auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>();
+	auto lbm_macro_data_ptr = saw::heap<saw::data<sch::MacroStruct<T,Desc>>>();
+	auto lbm_particle_data_ptr = saw::heap<saw::data<sch::FixedArray<sch::Particle<T,Desc::D>, particle_size>>>();
+
+	auto eo_aio = saw::setup_async_io();
+	if(eo_aio.is_error()){
+		return std::move(eo_aio.get_error());
+	}
+	auto& aio = eo_aio.get_value();
+	saw::wait_scope wait{aio.event_loop};
+
+	bool krun = true;
+	bool print_status = false;
+	aio.event_port.on_signal(saw::Signal::Terminate).then([&](){
+		krun = false;
+	}).detach();
+	aio.event_port.on_signal(saw::Signal::User1).then([&](){
+		print_status = true;
+	}).detach();
+
+	device dev;
+
+	auto& sycl_q = dev.get_handle();
+
+	sycl_q.wait();
+	{
+		auto eov = setup_initial_conditions<T,Desc>(*lbm_data_ptr,*lbm_macro_data_ptr,*lbm_particle_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+
+	saw::data<sch::ChunkStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_data{sycl_q};
+	saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q};
+	saw::data<sch::FixedArray<sch::Particle<T,Desc::D>, particle_size>, encode::Sycl<saw::encode::Native>> lbm_sycl_particle_data{sycl_q};
+	sycl_q.wait();
+
+	auto lsd_view = make_chunk_struct_view(lbm_sycl_data);
+	auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data);
+	{
+		auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	{
+		auto eov = dev.copy_to_device(*lbm_macro_data_ptr,lbm_sycl_macro_data);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	{
+		auto eov = dev.copy_to_device(*lbm_particle_data_ptr,lbm_sycl_particle_data);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	sycl_q.wait();
+	saw::data<sch::UInt64> time_steps{2048ul};
+
+	for(saw::data<sch::UInt64> i{0u}; i < time_steps and krun; ++i){
+		{
+			{
+				std::string file_name = "t_";
+				file_name += std::to_string(i.get());
+				file_name += ".vtk";
+				auto eov = write_vtk_file(out_dir/file_name, *lbm_macro_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}
+			/*{
+				std::string file_name = "p_";
+				file_name += std::to_string(i.get());
+				file_name += ".json";
+				auto eov = saw::easy::encode_and_write_file<sch::FixedArray<sch::Particle<T,Desc::D>,particle_size>,saw::encode::Json>(out_dir, *lbm_particle_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}*/
+		}
+		{
+			auto eov = step<T,Desc>(lsd_view,lsdm_view,i,dev);
+			if(eov.is_error()){
+				return eov;
+			}
+		}
+		{
+			auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr);
+			if(eov.is_error()){
+				return eov;
+			}
+		}
+		{
+			auto eov = dev.copy_to_host(lbm_sycl_data,*lbm_data_ptr);
+			if(eov.is_error()){
+				return eov;
+			}
+		}
+
+		wait.poll();
+		if(print_status){
+			std::cout<<"Status: "<<i.get()<<" of "<<time_steps.get()<<" - "<<(i.template cast_to<sch::Float64>().get() * 100 / time_steps.get())<<"%"<<std::endl;
+			print_status = false;
+		}
+	}
+	sycl_q.wait();
+	{
+		std::string file_name = "t_";
+		file_name += std::to_string(time_steps.get());
+		file_name += ".vtk";
+		auto eov = write_vtk_file(out_dir/file_name, *lbm_macro_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+
+	/*
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			std::cout<<index.at({0u}).get()<<" "<<index.at({1u}).get()<<" "<<lbm_data.get<"info">().at(index).template cast_to<sch::UInt16>().get()<<"\n";
+		},
+		{{0u,0u}},
+		{{dim_x, dim_y}}
+	);
+	*/
+
+	sycl_q.wait();
+	return saw::make_void();
+}
+
+using FloatT = kel::lbm::sch::Float32;
+
+int main(int argc, char** argv){
+	auto eov = lbm_main<FloatT,kel::lbm::sch::D3Q27>(argc, argv);
+	if(eov.is_error()){
+		auto& err = eov.get_error();
+		std::cerr<<"[Error] "<<err.get_category();
+		auto err_msg = err.get_message();
+		if(err_msg.size() > 0u){
+			std::cerr<<" - "<<err_msg;
+		}
+		std::cerr<<std::endl;
+		return err.get_id();
+	}
+	return 0;
+}
diff --git a/examples/poiseulle_particles_2d_gpu/sim.cpp b/examples/poiseulle_particles_2d_gpu/sim.cpp
index 6d5452e..887c476 100644
--- a/examples/poiseulle_particles_2d_gpu/sim.cpp
+++ b/examples/poiseulle_particles_2d_gpu/sim.cpp
@@ -158,7 +158,8 @@ saw::error_or<void> step(
 	// auto coll_ev = 
 	q.submit([&](acpp::sycl::handler& h){
 		// Need nicer things to handle the flow. I see improvement here
-		component<T,Desc,cmpt::HLBM,encode::Sycl<saw::encode::Native>> collision{0.6};
+		component<T,Desc,cmpt::BGK, encode::Sycl<saw::encode::Native>> collision{0.6};
+		// component<T,Desc,cmpt::HLBM,encode::Sycl<saw::encode::Native>> collision{0.6};
 		component<T,Desc,cmpt::BounceBack,encode::Sycl<saw::encode::Native>> bb;
 
 		saw::data<sch::Scalar<T>> rho_b;
@@ -329,7 +330,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){
 		}
 	}
 	sycl_q.wait();
-	saw::data<sch::UInt64> time_steps{256ul};
+	saw::data<sch::UInt64> time_steps{2048ul};
 
 	for(saw::data<sch::UInt64> i{0u}; i < time_steps and krun; ++i){
 		{
-- 
cgit v1.2.3