Merge branch 'dev'

author: Claudius "keldu" Holeksa <mail@keldu.de> 2026-06-02 20:20:48 +0200
committer: Claudius "keldu" Holeksa <mail@keldu.de> 2026-06-02 20:20:48 +0200
commit: 5ea4875b96bfacd4c5f0125c9e7b64b70f0ccfb9 (patch)
tree: 96b1625e2559e227e2f12802796450d64ab4ce45
parent: cf4132d9a02271847e774035c4a49ff9158ba289 (diff)
parent: da25b3a1e7776a810d3bda5af3f363cf3e986cae (diff)
download: libs-lbm-5ea4875b96bfacd4c5f0125c9e7b64b70f0ccfb9.tar.gz
18 files changed, 885 insertions, 85 deletions
diff --git a/README.md b/README.md
index 313eb8c..02e8b63 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
 # Lattice-Boltzmann-Method library  
 
 Small disclaimer. The commits are a bit chaotic.  
+It's intended for myself mostly. I should still have feature branches.  
diff --git a/default.nix b/default.nix
index 4ae4b71..d233d90 100644
--- a/default.nix
+++ b/default.nix
@@ -40,38 +40,13 @@ let
 		};
 	};
 
-	sci_tools = let
-		scitoolsSrc = stdenv.mkDerivation {
-			name = "scitools-src";
-
-		 src = builtins.fetchurl {
-			url = "https://git.keldu.de/apps-science_tools/snapshot/master.tar.gz";
-			sha256 = "e91c18fef798dd7b3afbd1615c2e320b90a74aa2d7ef726801a76e3f7f77ae81";
-		 };
-
-		 phases = [ "unpackPhase" "installPhase" ];
-
-		 unpackPhase = ''
-			mkdir source
-			tar -xzf "$src" -C source --strip-components=1
-		 '';
-
-		 installPhase = ''
-			cp -r source $out
-		 '';
-	  };
-	in
-		(import "${scitoolsSrc}/default.nix" {
-			inherit stdenv clang-tools forstio;
-		});
-
 	forstio = let
 		forstioSrc = stdenv.mkDerivation {
 			name = "forstio-src";
 
 		 src = builtins.fetchurl {
 			url = "https://git.keldu.de/forstio-forstio/snapshot/master.tar.gz";
-			sha256 = "sha256:17zsz10lj5dgqw3fmassgqlhbwgpd7zznbq8cl0sw1v816w3ca8z";
+			sha256 = "sha256:15iqzmymza47jjx4wpc19mbg3zzwmkabpssf5y968f566n0fnb9a";
 		 };
 
 		 phases = [ "unpackPhase" "installPhase" ];
@@ -212,7 +187,7 @@ in rec {
 				examples.poiseulle_particles_2d_psm_gpu
 				examples.poiseulle_particles_2d_hlbm_gpu
 				examples.poiseulle_particles_2d_fplbm_gpu
-				examples.poiseulle_3d_gpu
+				examples.poiseulle_particles_2d_ibm_gpu
 			];
 		};
 	};
diff --git a/examples/poiseulle_particles_2d_gpu/.nix/derivation.nix b/examples/poiseulle_particles_2d_gpu/.nix/derivation.nix
new file mode 100644
index 0000000..d78ff47
--- /dev/null
+++ b/examples/poiseulle_particles_2d_gpu/.nix/derivation.nix
@@ -0,0 +1,45 @@
+{ lib
+, stdenv
+, scons
+, clang-tools
+, forstio
+, python3
+, pname
+, version
+, adaptive-cpp
+, kel
+, slip ? false
+, particle_coupling ? "fplbm"
+}:
+
+let
+	slip_txt = if slip then "slip" else "noslip";
+in stdenv.mkDerivation {
+	pname = "${pname}-examples-poiseulle_2d_gpu_${particle_coupling}_${slip_txt}";
+	inherit version;
+	src = ./..;
+
+	nativeBuildInputs = [
+		scons
+		clang-tools
+		python3
+	];
+
+	buildInputs = [
+		forstio.core
+		forstio.async
+		forstio.codec
+		forstio.codec-unit
+		forstio.io
+    forstio.remote
+    forstio.remote-filesystem
+		forstio.codec-json
+		adaptive-cpp
+		kel.lbm.core
+		kel.lbm.sycl
+	];
+
+	preferLocalBuild = true;
+
+	outputs = [ "out" "dev" ];
+}
diff --git a/examples/poiseulle_particles_2d_gpu/SConscript b/examples/poiseulle_particles_2d_gpu/SConscript
new file mode 100644
index 0000000..4483d58
--- /dev/null
+++ b/examples/poiseulle_particles_2d_gpu/SConscript
@@ -0,0 +1,34 @@
+#!/bin/false
+
+import os
+import os.path
+import glob
+
+
+Import('env')
+
+dir_path = Dir('.').abspath
+
+# Environment for base library
+examples_env = env.Clone();
+examples_env['CXX'] = 'syclcc-clang';
+examples_env['CXXFLAGS'] += ['-O3'];
+
+examples_env.sources = sorted(glob.glob(dir_path + "/*.cpp"))
+examples_env.headers = sorted(glob.glob(dir_path + "/*.hpp"))
+
+env.sources += examples_env.sources;
+env.headers += examples_env.headers;
+
+# Cavity2D
+examples_objects = [];
+examples_env.add_source_files(examples_objects, ['sim.cpp'], shared=False);
+examples_env.poiseulle_2d_gpu = examples_env.Program('#bin/poiseulle_particles_2d_hlbm_gpu', [examples_objects]);
+
+# Set Alias
+env.examples = [
+	examples_env.poiseulle_2d_gpu
+];
+env.Alias('examples', env.examples);
+env.targets += ['examples'];
+env.Install('$prefix/bin/', env.examples);
diff --git a/examples/poiseulle_particles_2d_gpu/SConstruct b/examples/poiseulle_particles_2d_gpu/SConstruct
new file mode 100644
index 0000000..0611b67
--- /dev/null
+++ b/examples/poiseulle_particles_2d_gpu/SConstruct
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import os.path
+import glob
+import re
+
+
+if sys.version_info < (3,):
+    def isbasestring(s):
+        return isinstance(s,basestring)
+else:
+    def isbasestring(s):
+        return isinstance(s, (str,bytes))
+
+def add_kel_source_files(self, sources, filetype, lib_env=None, shared=False, target_post=""):
+
+    if isbasestring(filetype):
+        dir_path = self.Dir('.').abspath
+        filetype = sorted(glob.glob(dir_path+"/"+filetype))
+
+    for path in filetype:
+        target_name = re.sub( r'(.*?)(\.cpp|\.c\+\+)', r'\1' + target_post, path )
+        if shared:
+            target_name+='.os'
+            sources.append( self.SharedObject( target=target_name, source=path ) )
+        else:
+            target_name+='.o'
+            sources.append( self.StaticObject( target=target_name, source=path ) )
+    pass
+
+def isAbsolutePath(key, dirname, env):
+	assert os.path.isabs(dirname), "%r must have absolute path syntax" % (key,)
+
+env_vars = Variables(
+	args=ARGUMENTS
+)
+
+env_vars.Add('prefix',
+	help='Installation target location of build results and headers',
+	default='/usr/local/',
+	validator=isAbsolutePath
+)
+
+env_vars.Add('build_examples',
+	help='If examples should be built',
+	default="true"
+)
+
+env=Environment(ENV=os.environ, variables=env_vars, CPPPATH=[],
+    CPPDEFINES=['SAW_UNIX'],
+    CXXFLAGS=[
+        '-std=c++20',
+        '-g',
+        '-Wall',
+        '-Wextra'
+    ],
+    LIBS=[
+        'forstio-core',
+        'forstio-async',
+        'forstio-io'
+    ]
+);
+env.__class__.add_source_files = add_kel_source_files
+env.Tool('compilation_db');
+env.cdb = env.CompilationDatabase('compile_commands.json');
+
+env.objects = [];
+env.sources = [];
+env.headers = [];
+env.targets = [];
+
+Export('env')
+SConscript('SConscript')
+
+env.Alias('cdb', env.cdb);
+env.Alias('all', [env.targets]);
+env.Default('all');
+
+env.Alias('install', '$prefix')
diff --git a/examples/poiseulle_particles_2d_gpu/sim.cpp b/examples/poiseulle_particles_2d_gpu/sim.cpp
new file mode 100644
index 0000000..9375078
--- /dev/null
+++ b/examples/poiseulle_particles_2d_gpu/sim.cpp
@@ -0,0 +1,423 @@
+#include <kel/lbm/sycl/lbm.hpp>
+#include <kel/lbm/lbm.hpp>
+#include <kel/lbm/particle.hpp>
+
+#include <forstio/io/io.hpp>
+#include <forstio/remote/filesystem/easy.hpp>
+#include <forstio/codec/json/json.hpp>
+#include <forstio/codec/simple.hpp>
+
+namespace kel {
+namespace lbm {
+
+constexpr uint64_t dim_y = 256ul;
+constexpr uint64_t dim_x = dim_y * 20ul;
+
+constexpr uint64_t particle_amount = 1ul;
+
+namespace sch {
+using namespace saw::schema;
+
+using InfoChunk = Chunk<UInt8, 0u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using DfChunk = Chunk<FixedArray<T,Desc::Q>, 1u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using ScalarChunk = Chunk<Scalar<T>, 0u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using VectorChunk = Chunk<Vector<T,Desc::D>, 0u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using ChunkStruct = Struct<
+	Member<InfoChunk, "info">,
+	Member<DfChunk<T,Desc>, "dfs">,
+	Member<DfChunk<T,Desc>, "dfs_old">,
+	Member<VectorChunk<T,Desc>, "particle_N">,
+	Member<ScalarChunk<T,Desc>, "particle_D">
+>;
+
+template<typename T, typename Desc>
+using VelChunk = Chunk<Vector<T,Desc::D>, 0u, dim_x, dim_y>;
+
+template<typename T>
+using RhoChunk = Chunk<Scalar<T>, 0u, dim_x, dim_y>;
+
+template<typename T, typename Desc>
+using MacroStruct = Struct<
+	Member<VelChunk<T,Desc>, "velocity">,
+	Member<RhoChunk<T>, "density">,
+	Member<ScalarChunk<T,Desc>, "porosity">
+>;
+
+//template<typename T, typename Desc>
+//using ParticleArray = Array<
+//	Particle<T,Desc::D>
+//>;
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> setup_initial_conditions(
+		saw::data<sch::ChunkStruct<T,Desc>>& fields,
+		saw::data<sch::MacroStruct<T,Desc>>& macros
+){
+	auto& info_f = fields.template get<"info">();
+	auto& porous_f = macros.template get<"porosity">();
+	// Set everything as walls
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(1u);
+		},
+		{},
+		info_f.get_dims(),
+		{}
+	);
+	// Fluid
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(2u);
+		},
+		{},
+		info_f.get_dims(),
+		{{1u,1u}}
+	);
+	
+	// Inflow
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(3u);
+		},
+		{{0u,0u}},
+		{{1u,dim_y}},
+		{{0u,1u}}
+	);
+	
+	// Outflow
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			info_f.at(index).set(4u);
+		},
+		{{dim_x-1u,0u}},
+		{{dim_x, dim_y}},
+		{{0u,1u}}
+	);
+	//
+	auto& df_f = fields.template get<"dfs_old">();
+	auto& rho_f = macros.template get<"density">();
+	auto& vel_f = macros.template get<"velocity">();
+	auto& por_f = macros.template get<"porosity">();
+	
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			auto& df = df_f.at(index);
+			auto& rho = rho_f.at(index);
+			por_f.at(index).at({}) = {1};
+			rho.at({}) = {1};
+			auto& vel = vel_f.at(index);
+			auto eq = equilibrium<T,Desc>(rho,vel);
+
+			df = eq;
+		},
+		{},// 0-index
+		df_f.get_dims()
+	);
+
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			auto& df = df_f.at(index);
+			auto& rho = rho_f.at(index);
+			rho.at({}) = {1};
+			auto& vel = vel_f.at(index);
+			if(info_f.at(index).get() == 2u){
+				vel.at({{0u}}) = 0.0;
+			}
+			auto eq = equilibrium<T,Desc>(rho,vel);
+
+			df = eq;
+		},
+		{},// 0-index
+		df_f.get_dims(),
+		{{1u,1u}}
+	);
+	
+	iterator<Desc::D>::apply(
+		[&](auto& index){
+			saw::data<sch::Vector<T,Desc::D>> middle, ind_vec;
+			middle.at({{0u}}) = dim_x * 0.25;
+			middle.at({{1u}}) = dim_y * 0.5;
+
+			ind_vec.at({{0u}}) = index.at({{0u}}).template cast_to<T>();
+			ind_vec.at({{1u}}) = index.at({{1u}}).template cast_to<T>();
+
+			auto dist = middle - ind_vec;
+			auto dist_2 = saw::math::dot(dist,dist);
+			if(dist_2.at({}).get() < dim_y*dim_y*0.01){
+				porous_f.at(index).at({}) = 0.0;
+			}
+		},
+		{},// 0-index
+		df_f.get_dims()
+	);
+
+	return saw::make_void();
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> step(
+		saw::data<sch::Ptr<sch::ChunkStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& fields,
+		saw::data<sch::Ptr<sch::MacroStruct<T,Desc>>,encode::Sycl<saw::encode::Native>>& macros,
+		saw::data<sch::UInt64> t_i,
+		device& dev
+){
+	auto& q = dev.get_handle();
+	auto& info_f = fields.template get<"info">();
+	auto& porous_f = macros.template get<"porosity">();
+
+	// auto coll_ev = 
+	q.submit([&](acpp::sycl::handler& h){
+		component<T,Desc,cmpt::Hlbm,encode::Sycl<saw::encode::Native>> collision{0.65};
+		component<T,Desc,cmpt::BounceBack,encode::Sycl<saw::encode::Native>> bb;
+		component<T,Desc,cmpt::AntiBounceBack<0u>,encode::Sycl<saw::encode::Native>> abb;
+
+		saw::data<sch::Scalar<T>> rho_b;
+		rho_b.at({}) = 1.0;
+		saw::data<sch::Vector<T,Desc::D>> vel_b;
+		vel_b.at({{0u}}) = 0.015;
+
+		component<T,Desc,cmpt::Equilibrium,encode::Sycl<saw::encode::Native>> equi{rho_b,vel_b};
+
+		component<T,Desc,cmpt::ZouHeHorizontal<true>,encode::Sycl<saw::encode::Native>> flow_in{
+			[&](){
+				uint64_t target_t_i = 64u;
+				if(t_i.get() < target_t_i){
+					return 1.0 + (0.0002 / target_t_i) * t_i.get();
+				}
+				return 1.0002;
+			}()
+		};
+		component<T,Desc,cmpt::ZouHeHorizontal<false>,encode::Sycl<saw::encode::Native>> flow_out{1.0};
+
+
+		h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y}, [=](acpp::sycl::id<Desc::D> idx){
+			saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index;
+			for(uint64_t i = 0u; i < Desc::D; ++i){
+				index.at({{i}}).set(idx[i]);
+			}
+
+			auto info = info_f.at(index);
+			
+			switch(info.get()){
+				case 0u:
+				break;
+				case 1u:
+					bb.apply(fields,index,t_i);
+				break;
+				case 2u:
+					collision.apply(fields,macros,index,t_i);
+					break;
+				case 3u:
+					flow_in.apply(fields,index,t_i);
+					// equi.apply(fields,index,t_i);
+					collision.apply(fields,macros,index,t_i);
+					break;
+				case 4u:
+					flow_out.apply(fields,index,t_i);
+					// equi.apply(fields,index,t_i);
+					collision.apply(fields,macros,index,t_i);
+				break;
+				default:
+				break;
+			}
+		});
+	}).wait();
+
+	
+	// Step
+	/*
+	q.submit([&](acpp::sycl::handler& h){
+		// h.depends_on(collision_ev);
+	}).wait();
+	*/
+
+	return saw::make_void();
+}
+}
+}
+
+template<typename T, typename Desc>
+saw::error_or<void> lbm_main(int argc, char** argv){
+	using namespace kel::lbm;
+
+	using dfi = df_info<T,Desc>;
+
+	auto eo_lbm_dir = output_directory();
+	if(eo_lbm_dir.is_error()){
+		return std::move(eo_lbm_dir.get_error());
+	}
+	auto& lbm_dir = eo_lbm_dir.get_value();
+
+	auto out_dir = lbm_dir / "poiseulle_particles_2d_hlbm_gpu";
+
+	{
+		std::error_code ec;
+		std::filesystem::create_directories(out_dir,ec);
+		if(ec != std::errc{}){
+			return saw::make_error<saw::err::critical>("Could not create output directory");
+		}
+	}
+
+	converter<T> conv {
+		// delta_x
+		{{1.0}},
+		// delta_t
+		{{1.0}}
+	};
+	
+	print_lbm_meta<T,Desc>(conv,{0.1},{1e-4},{0.4 * dim_y});
+
+	// saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}};
+	auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>();
+	auto lbm_macro_data_ptr = saw::heap<saw::data<sch::MacroStruct<T,Desc>>>();
+	
+	std::cout<<"Estimated Bytes: "<<memory_estimate<sch::ChunkStruct<T,Desc>,sch::MacroStruct<T,Desc>>().get()<<std::endl;
+
+	auto eo_aio = saw::setup_async_io();
+	if(eo_aio.is_error()){
+		return std::move(eo_aio.get_error());
+	}
+	auto& aio = eo_aio.get_value();
+	saw::wait_scope wait{aio.event_loop};
+
+	bool krun = true;
+	bool print_status = false;
+	aio.event_port.on_signal(saw::Signal::Terminate).then([&](){
+		krun = false;
+	}).detach();
+	aio.event_port.on_signal(saw::Signal::User1).then([&](){
+		print_status = true;
+	}).detach();
+
+	device dev;
+
+	auto& sycl_q = dev.get_handle();
+
+	sycl_q.wait();
+	{
+		auto eov = setup_initial_conditions<T,Desc>(*lbm_data_ptr,*lbm_macro_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	{
+		auto eov = write_vtk_file(out_dir,"initial_state",0u,*lbm_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+
+	saw::data<sch::ChunkStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_data{sycl_q};
+	saw::data<sch::MacroStruct<T,Desc>, encode::Sycl<saw::encode::Native>> lbm_sycl_macro_data{sycl_q};
+	sycl_q.wait();
+
+	{
+		auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	{
+		auto eov = dev.copy_to_device(*lbm_macro_data_ptr,lbm_sycl_macro_data);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	sycl_q.wait();
+	auto lsd_view = make_view(lbm_sycl_data);
+	auto lsdm_view = make_view(lbm_sycl_macro_data);
+	saw::data<sch::UInt64> time_steps{16u*4096ul};
+	auto& info_f = lsd_view.template get<"info">();
+
+	for(saw::data<sch::UInt64> i{0u}; i < time_steps and krun; ++i){
+		// BC + Collision
+		{
+			auto eov = step<T,Desc>(lsd_view,lsdm_view,i,dev);
+			if(eov.is_error()){
+				return eov;
+			}
+		}
+		sycl_q.wait();
+		if(i.get() % 32u == 0u){
+			{
+				auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}
+			{
+				auto eov = write_csv_file(out_dir,"m",i.get(), *lbm_macro_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}
+		}
+		// Stream
+		sycl_q.submit([&](acpp::sycl::handler& h){
+			component<T,Desc,cmpt::Stream,encode::Sycl<saw::encode::Native>> stream;
+
+			h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y}, [=](acpp::sycl::id<Desc::D> idx){
+				saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index;
+				for(uint64_t i = 0u; i < Desc::D; ++i){
+					index.at({{i}}).set(idx[i]);
+				}
+				
+				auto info = info_f.at(index);
+				
+				if(info.get() > 0u){
+					stream.apply(lsd_view,index,i);
+				}
+			});
+		}).wait();
+		wait.poll();
+		if(print_status){
+			std::cout<<"Status: "<<i.get()<<" of "<<time_steps.get()<<" - "<<(i.template cast_to<sch::Float64>().get() * 100 / time_steps.get())<<"%"<<std::endl;
+			print_status = false;
+		}
+		print_progress_bar(i.get(), time_steps.get()-1u);
+	}
+
+	// After Loop
+	sycl_q.wait();
+	{
+		auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+	{
+		auto eov = write_vtk_file(out_dir,"m",time_steps.get(), *lbm_macro_data_ptr);
+		if(eov.is_error()){
+			return eov;
+		}
+	}
+
+	sycl_q.wait();
+	return saw::make_void();
+}
+
+using FloatT = kel::lbm::sch::Float32;
+
+int main(int argc, char** argv){
+	auto eov = lbm_main<FloatT,kel::lbm::sch::D2Q9>(argc, argv);
+	if(eov.is_error()){
+		auto& err = eov.get_error();
+		std::cerr<<"[Error] "<<err.get_category();
+		auto err_msg = err.get_message();
+		if(err_msg.size() > 0u){
+			std::cerr<<" - "<<err_msg;
+		}
+		std::cerr<<std::endl;
+		return err.get_id();
+	}
+	return 0;
+}
diff --git a/examples/poiseulle_particles_2d_ibm_gpu/sim.cpp b/examples/poiseulle_particles_2d_ibm_gpu/sim.cpp
index e68d7da..e1bd3ba 100644
--- a/examples/poiseulle_particles_2d_ibm_gpu/sim.cpp
+++ b/examples/poiseulle_particles_2d_ibm_gpu/sim.cpp
@@ -1,7 +1,7 @@
 #include <kel/lbm/sycl/lbm.hpp>
 #include <kel/lbm/lbm.hpp>
 #include <kel/lbm/particle.hpp>
-#include <kel/lbm/math/n_linear.hpp>
+#include <kel/lbm/math/math.hpp>
 
 #include <forstio/io/io.hpp>
 #include <forstio/remote/filesystem/easy.hpp>
@@ -114,6 +114,7 @@ saw::error_or<void> setup_initial_conditions(
 	auto& df_f = fields.template get<"dfs_old">();
 	auto& rho_f = macros.template get<"density">();
 	auto& vel_f = macros.template get<"velocity">();
+	auto& force_f = macros.template get<"force">();
 	
 	iterator<Desc::D>::apply(
 		[&](auto& index){
@@ -135,6 +136,9 @@ saw::error_or<void> setup_initial_conditions(
 			auto& rho = rho_f.at(index);
 			rho.at({}) = {1};
 			auto& vel = vel_f.at(index);
+			auto& force = force_f.at(index);
+			force = {};
+
 			if(info_f.at(index).get() == 2u){
 				vel.at({{0u}}) = 0.0;
 			}
@@ -195,15 +199,79 @@ saw::error_or<void> step(
 ){
 	auto& q = dev.get_handle();
 	auto& info_f = fields.template get<"info">();
+	auto& force_f = macros.template get<"force">();
+
+	q.submit([&](acpp::sycl::handler& h){
+		h.parallel_for(acpp::sycl::range<Desc::D>{dim_x,dim_y}, [=](acpp::sycl::id<Desc::D> idx){
+			saw::data<sch::FixedArray<sch::UInt64,Desc::D>> index;
+			for(uint64_t i = 0u; i < Desc::D; ++i){
+				index.at({{i}}).set(idx[i]);
+			}
+
+			auto& force = force_f.at(index);
+
+			for(uint64_t i{0u}; i < Desc::D; ++i){
+				force.at({{i}}) = 0.0;
+			}
+		});
+	}).wait();
+			
+	q.submit([&](acpp::sycl::handler& h){
+		h.parallel_for(acpp::sycl::range<1u>{1u}, [=](acpp::sycl::id<1u> idx){
+			auto& vel_f = macros.template get<"velocity">();
+			auto& dense_f = macros.template get<"density">();
+
+			auto& ps = particles;
+			auto& mask = ps.template get<"mask">();
+			auto& mask_step = ps.template get<"mask_step">().at({});
+			auto& p_dense = ps.template get<"density">().at({});
+			auto& com = ps.template get<"center_of_mass">();
+
+			auto& parts = ps.template get<"particles">();
+
+			auto& p_i = parts.at({{idx[0u]}});
+
+			auto& p_i_rb = p_i.template get<"rigid_body">();
+			/// 0. Iterate over mask and calculate position in LBM grid
+			/// In this case it's simple since I'm too lazy to do scaling and rotation
+			/// Technically scale => rotate => translate
+			/// Here it's only translate
+			auto& p_i_rb_pos = p_i_rb.template get<"position">();
+
+			iterator<Desc::D>::apply([&](const auto& index){
+				/// Calculate the shift from the mask
+				saw::data<sch::Vector<T,Desc::D>> index_shift;
+				for(uint64_t i = 0u; i < Desc::D; ++i){
+					index_shift.at({{i}}) = index.at({i}).template cast_to<T>() - com.at({}).at({{i}});
+					// Scale to LBM Grid
+					index_shift.at({{i}}) = index_shift.at({{i}}) * mask_step.at({});
+				}
+
+				// Shift our pos into the index
+				auto p_i_rb_pos_ind = p_i_rb_pos + index_shift;
+							
+				/// Calculate force pickup from neigbouring u_vel cells
+				// auto inter_vel_fluid = n_linear_interpolate(vel_f,p_i_rb_pos_ind);
+				auto inter_vel_fluid = n_closest_read(vel_f,p_i_rb_pos_ind);
+
+				// Technically TODO to use moment
+				auto inter_moment_fluid = inter_vel_fluid;
+
+				// Technically Particles can have more timesteps than the fluid
+				auto force_response = -inter_moment_fluid;
+
+				/// Distribute force to fluid
+				// n_linear_spread(force_f,p_i_rb_pos_ind, force_response);
+				n_closest_add(force_f,p_i_rb_pos_ind, force_response);
+
+			}, {}, mask.meta());
+		});
+	}).wait();
 
 	// auto coll_ev = 
 	q.submit([&](acpp::sycl::handler& h){
 		// Need nicer things to handle the flow. I see improvement here
-		saw::data<sch::Vector<T,Desc::D>> f;
-		f.at({{0u}}) = 0.0;
-		f.at({{1u}}) = -1.0;
-
-		component<T,Desc,cmpt::BGKGuo, encode::Sycl<saw::encode::Native>> collision{0.65,f};
+		component<T,Desc,cmpt::BGKGuo, encode::Sycl<saw::encode::Native>> collision{0.65};
 		component<T,Desc,cmpt::BounceBack,encode::Sycl<saw::encode::Native>> bb;
 		component<T,Desc,cmpt::AntiBounceBack<0u>,encode::Sycl<saw::encode::Native>> abb;
 
@@ -233,7 +301,7 @@ saw::error_or<void> step(
 			}
 
 			auto info = info_f.at(index);
-			
+						
 			switch(info.get()){
 				case 0u:
 				break;
@@ -256,6 +324,8 @@ saw::error_or<void> step(
 				break;
 			}
 		});
+
+		
 	}).wait();
 
 	
@@ -265,44 +335,6 @@ saw::error_or<void> step(
 		// h.depends_on(collision_ev);
 	}).wait();
 	*/
-	q.submit([&](acpp::sycl::handler& h){
-		h.parallel_for(acpp::sycl::range<1u>{1u}, [=](acpp::sycl::id<1u> idx){
-			auto& vel = macros.template get<"velocity">();
-
-			auto& ps = particles;
-			auto& mask = ps.template get<"mask">();
-			auto& dense = ps.template get<"density">().at({});
-			auto& com = ps.template get<"center_of_mass">();
-
-			auto& parts = ps.template get<"particles">();
-
-			auto& p_i = parts.at({{0u}});
-
-			auto& p_i_rb = p_i.template get<"rigid_body">();
-			/// 0. Iterate over mask and calculate position in LBM grid
-			/// In this case it's simple since I'm too lazy to do scaling and rotation
-			/// Technically scale => rotate => translate
-			/// Here it's only translate
-			auto& p_i_rb_pos = p_i_rb.template get<"position">();
-
-			iterator<Desc::D>::apply([&](const auto& index){
-				/// Calculate the shift from the mask
-				saw::data<sch::Vector<T,Desc::D>> index_shift;
-				for(uint64_t i = 0u; i < Desc::D; ++i){
-					index_shift.at({{i}}) = index.at({i}).template cast_to<T>() - com.at({{i}});
-				}
-
-
-				/// TODO 1. Calculate force pickup from neigbouring u_vel cells
-				auto inter_vel = n_linear_interpolate(vel,index_shift);
-
-				/// TODO 3. Distribute force to fluid
-
-
-			}, {}, mask.meta());
-		});
-	}).wait();
-
 	return saw::make_void();
 }
 }
@@ -476,8 +508,21 @@ saw::error_or<void> lbm_main(int argc, char** argv){
 			});
 		}).wait();
 		wait.poll();
+
+		// PRINT STATUS ON SIGUSR1
 		if(print_status){
-			std::cout<<"Status: "<<i.get()<<" of "<<time_steps.get()<<" - "<<(i.template cast_to<sch::Float64>().get() * 100 / time_steps.get())<<"%"<<std::endl;
+			{
+				auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}
+			{
+				auto eov = write_vtk_file(out_dir,"m",i.get(), *lbm_macro_data_ptr);
+				if(eov.is_error()){
+					return eov;
+				}
+			}
 			print_status = false;
 		}
 		print_progress_bar(i.get(), time_steps.get()-1u);
diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp
index 9fdea8c..33712b5 100644
--- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp
+++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp
@@ -91,7 +91,7 @@ saw::error_or<void> setup_initial_conditions(
 		{{1u,1u}}
 	);
 	//
-	auto& df_f = fields.template get<"dfs_old">();
+	auto& df_f = fields.template gStarted hearing about similar cases not long after, many of which were successful on the part of the crooks. Scary stuff. et<"dfs_old">();
 	auto& rho_f = macros.template get<"density">();
 	auto& vel_f = macros.template get<"velocity">();
 	
diff --git a/lib/core/c++/abstract/data.hpp b/lib/core/c++/abstract/data.hpp
index 0075718..ed23268 100644
--- a/lib/core/c++/abstract/data.hpp
+++ b/lib/core/c++/abstract/data.hpp
@@ -48,4 +48,5 @@ template<typename Sch>
 struct schema {
 	using Type = Sch;
 };
+
 }
diff --git a/lib/core/c++/chunk.hpp b/lib/core/c++/chunk.hpp
index a1f2451..0f92437 100644
--- a/lib/core/c++/chunk.hpp
+++ b/lib/core/c++/chunk.hpp
@@ -25,6 +25,7 @@ struct chunk_schema_type_helper<Sch, Ghost, saw::tmpl_value_group<uint64_t>, saw
 template<typename Sch, uint64_t Ghost, uint64_t... Sides>
 struct Chunk {
 	using InnerSchema = typename impl::chunk_schema_type_helper<Sch, Ghost, saw::tmpl_value_group<uint64_t,Sides...>>::Schema;
+	using StoredValueSchema = Sch;
 };
 
 // Not needed for now
diff --git a/lib/core/c++/collision.hpp b/lib/core/c++/collision.hpp
index 9c76c1a..023f61f 100644
--- a/lib/core/c++/collision.hpp
+++ b/lib/core/c++/collision.hpp
@@ -146,7 +146,8 @@ public:
 
 		saw::data<sch::Scalar<T>> half;
 		half.at({}).set(0.5);
-		saw::data<sch::Vector<T,Descriptor::D>> vel = vel_f.at(index) + total_force * ( half / rho );
+		auto& vel = vel_f.at(index);
+		vel = vel + total_force * ( half / rho );
 
 		compute_rho_u<T,Descriptor>(dfs_old_f.at(index),rho,vel);
 		auto eq = equilibrium<T,Descriptor>(rho,vel);
diff --git a/lib/core/c++/hlbm.hpp b/lib/core/c++/hlbm.hpp
index 196de73..7590cc2 100644
--- a/lib/core/c++/hlbm.hpp
+++ b/lib/core/c++/hlbm.hpp
@@ -7,10 +7,39 @@
 namespace kel {
 namespace lbm {
 namespace cmpt {
+struct HlbmInit {}; 
 struct Hlbm {};
 struct HlbmParticle {};
 }
 
+template<typename T, typename Descriptor, typename Encode>
+class component<T, Descriptor, cmpt::HlbmInit, Encode> final {
+private:
+	typename saw::native_data_type<T>::type relaxation_;
+	saw::data<T> frequency_;
+public:
+	component(typename saw::native_data_type<T>::type relaxation__):
+		relaxation_{relaxation__},
+		frequency_{typename saw::native_data_type<T>::type(1) / relaxation_}
+	{}
+
+	template<typename CellFieldSchema, typename MacroFieldSchema>
+	void apply(const saw::data<CellFieldSchema, Encode>& field, const saw::data<MacroFieldSchema,Encode>& macros, saw::data<sch::FixedArray<sch::UInt64,Descriptor::D>> index, saw::data<sch::UInt64> time_step) const {
+		auto& porosity_f = macros.template get<"porosity">();
+		auto& particle_N_f = field.template get<"particle_N">();
+		auto& particle_D_f = field.template get<"particle_D">();
+
+		auto& por = porosity_f.at(index);
+		por = {};
+
+		auto& pnf = particle_N_f.at(index);
+		pnf = {};
+
+		auto& pnd = particle_D_f.at(index);
+		pnd = {};
+	}
+};
+
 /**
  * HLBM collision operator for LBM
  */
@@ -61,8 +90,9 @@ public:
 			dfs_old_f.at(index).at({i}) = dfs_old_f.at(index).at({i}) + frequency_ * (eq.at(i) - dfs_old_f.at(index).at({i}));
 		}
 
-		// porosity.at({}) = 1.0;
+		porosity.at({}) = 1.0;
 		D.at({}) = 0.0;
+		N = {};
 	}
 };
 
@@ -80,6 +110,7 @@ public:
 
 		/// Iterate over the grid bounds
 		// auto& grid = p.template get<"grid">();
+		
 
 	}
 };
diff --git a/lib/core/c++/lbm.hpp b/lib/core/c++/lbm.hpp
index fbad908..b34ec10 100644
--- a/lib/core/c++/lbm.hpp
+++ b/lib/core/c++/lbm.hpp
@@ -23,6 +23,8 @@
 #include "write_vtk.hpp"
 #include "util.hpp"
 
+#include "math/math.hpp"
+
 #include <forstio/codec/unit/unit_print.hpp>
 #include <iostream>
 
diff --git a/lib/core/c++/math/math.hpp b/lib/core/c++/math/math.hpp
new file mode 100644
index 0000000..3920bec
--- /dev/null
+++ b/lib/core/c++/math/math.hpp
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "n_linear.hpp"
+#include "n_closest.hpp"
diff --git a/lib/core/c++/math/n_closest.hpp b/lib/core/c++/math/n_closest.hpp
new file mode 100644
index 0000000..ac0fe2f
--- /dev/null
+++ b/lib/core/c++/math/n_closest.hpp
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "../common.hpp"
+#include "../iterator.hpp"
+
+namespace kel {
+namespace lbm {
+
+template<typename FieldSchema, typename Encode, typename T, uint64_t D>
+saw::data<typename FieldSchema::StoredValueSchema> n_closest_read(const saw::data<sch::Ptr<FieldSchema>,Encode>& f, const saw::data<sch::Vector<T,D>>& frac_ind){
+
+	auto shift_frac_ind = frac_ind;
+	for(uint64_t i{0u}; i < D; ++i){
+		
+		shift_frac_ind.at({{i}}) = shift_frac_ind.at({{i}})  + saw::data<T>{0.5};
+		if(shift_frac_ind.at({{i}}).get() < 0){
+			shift_frac_ind.at({{i}}) = {};
+		}
+	}
+
+	saw::data<sch::FixedArray<sch::UInt64,D>> shift_ind;
+	for(uint64_t i{0u}; i < D; ++i){
+		shift_ind.at({i}) = frac_ind.at({{i}}).template cast_to<sch::UInt64>();
+	}
+
+	return f.at(shift_ind);
+}
+
+template<typename FieldSchema, typename Encode, typename T, uint64_t D>
+void n_closest_add(const saw::data<sch::Ptr<FieldSchema>,Encode>& f, const saw::data<sch::Vector<T,D>>& frac_ind, const saw::data<typename FieldSchema::StoredValueSchema>& val){
+	auto shift_frac_ind = frac_ind;
+	for(uint64_t i{0u}; i < D; ++i){
+		
+		shift_frac_ind.at({{i}}) = shift_frac_ind.at({{i}})  + saw::data<T>{0.5};
+		if(shift_frac_ind.at({{i}}).get() < 0){
+			shift_frac_ind.at({{i}}) = {};
+		}
+	}
+
+	auto f_meta = f.meta();
+	saw::data<sch::FixedArray<sch::UInt64,D>> shift_ind;
+	for(uint64_t i{0u}; i < D; ++i){
+		shift_ind.at({i}) = frac_ind.at({{i}}).template cast_to<sch::UInt64>();
+		if(shift_ind.at({i}) < f_meta.at({i})){
+			shift_ind.at({i}) = f_meta.at({i}) - 1u;
+		}
+	}
+	auto& f_i = f.at(shift_ind);
+
+	f_i = f_i + val;
+}
+
+}
+}
diff --git a/lib/core/c++/math/round.hpp b/lib/core/c++/math/round.hpp
new file mode 100644
index 0000000..d3a2586
--- /dev/null
+++ b/lib/core/c++/math/round.hpp
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "../common.hpp"
+
+namespace kel {
+namespace lbm {
+
+template<typename T, uint64_t D>
+saw::data<sch::FixedArray<sch::UInt64,D>> round_to_unsigned(const saw::data<sch::Vector<T,D>>& inp){
+	saw::data<sch::FixedArray<sch::UInt64,D>> rv;
+				
+	auto zero = static_cast<saw::native_data_type<T>::type>(0);
+	auto half = static_cast<saw::native_data_type<T>::type>(0.5);
+
+	for(uint64_t i{0u}; i < D; ++i){
+		auto val = inp.at({{i}}).get()+half;
+		val = std::max(zero,val);
+
+		rv.at({i}).set(static_cast<uint64_t>(val));
+	}
+
+	return rv;
+}
+
+}
+}
diff --git a/lib/core/c++/particle/particle.hpp b/lib/core/c++/particle/particle.hpp
index 938131b..fec2eca 100644
--- a/lib/core/c++/particle/particle.hpp
+++ b/lib/core/c++/particle/particle.hpp
@@ -51,26 +51,33 @@ using Particle = Struct<
 	// Member<Array<Float64,D>, "mask">, 
 >;
 
-template<typename T, uint64_t D, typename CollisionType = ParticleCollisionSpheroid<T,2.0f>>
+template<typename T, uint64_t D, typename CollisionType = ParticleCollisionSpheroid<T>>
 using ParticleGroup = Struct<
 	Member<Array<T,D>, "mask">,
+	Member<FixedArray<Scalar<T>,1u>, "mask_step">,
 	Member<FixedArray<Scalar<T>,1u>, "density">,
-	Member<Vector<T,D>, "center_of_mass">,
-	Member<Scalar<T>, "total_mass">,
-	Member<Array<Particle<T,D>>, "particles">
+	Member<FixedArray<Vector<T,D>,1u>, "center_of_mass">,
+	Member<FixedArray<Scalar<T>,1u>, "total_mass">,
+	Member<Array<Particle<T,D>,1u>, "particles">
 >;
 }
 
+
+
 template<typename T, uint64_t D, typename saw::native_data_type<T>::type radius>
 saw::data<sch::ParticleGroup<T,D, sch::ParticleCollisionSpheroid<T,radius>>> create_spheroid_particle_group(
 		saw::data<sch::Scalar<T>> density_p,
 		const saw::data<sch::UInt64>& mask_resolution
 ){
-	saw::data<sch::ParticleGroup<T,D,sch::ParticleCollisionSpheroid<T,2.0f>>> part;
+	saw::data<sch::ParticleGroup<T,D,sch::ParticleCollisionSpheroid<T,radius>>> part;
 
 	auto& mask = part.template get<"mask">();
 	auto& density = part.template get<"density">().at({{0u}});
 
+	auto& total_mass = part.template get<"total_mass">().at({{0u}});
+	// Paranoia
+	total_mass.at({}) = {};
+
 	static_assert(D >= 1u and D <= 3u, "Dimensions only supported for Dim 1,2 & 3.");
 	density = density_p;
 
@@ -78,14 +85,16 @@ saw::data<sch::ParticleGroup<T,D, sch::ParticleCollisionSpheroid<T,radius>>> cre
 	for(uint64_t i = 0u; i < D; ++i){
 		mask_dims.at({i}) = mask_resolution;
 	}
-	saw::data<sch::Scalar<T>> mask_step;
 	saw::data<T> rad_d{radius};
 	saw::data<T> dia_d = rad_d * 2;
-	mask_step.at({}) = dia_d / mask_resolution.template cast_to<T>();
 
 	mask = {mask_dims};
 
-	auto& com = part.template get<"center_of_mass">();
+	auto& mask_step = part.template get<"mask_step">().at({{0u}});
+	mask_step.at({}) = dia_d / mask_resolution.template cast_to<T>();
+
+	auto& com = part.template get<"center_of_mass">().at({{0u}});
+	// Paranoia
 	for(uint64_t i = 0u; i < D; ++i){
 		com.at({{i}}) = {};
 	}
@@ -97,14 +106,27 @@ saw::data<sch::ParticleGroup<T,D, sch::ParticleCollisionSpheroid<T,radius>>> cre
 
 	saw::data<sch::Vector<T,D>> center;
 	for(uint64_t i = 0u; i < D; ++i){
-		com.at({{i}}) = ;
+		center.at({{i}}).set(radius);
 	}
 
 	iterator<D>::apply([&](const auto& index){
 		++ele_ctr;
 
+		saw::data<sch::Vector<T,D>> offset_index = saw::math::vectorize_data(index).template cast_to<T>() - center;
+
+		auto& dpi = mask.at(index);
+
+		for(uint64_t i = 0u; i < D; ++i){
+			com.at({{i}}) = com.at({{i}}) + index.at({i}).template cast_to<T>() * dpi;
+		}
+
+		total_mass.at({}) = total_mass.at({}) + dpi;
+
 	},{},mask_dims);
 
+	for(uint64_t i = 0u; i < D; ++i){
+		com.at({{i}}) = com.at({{i}}) / total_mass.at({});
+	}
 	return part;
 }
 
diff --git a/lib/core/c++/particle/porosity.hpp b/lib/core/c++/particle/porosity.hpp
new file mode 100644
index 0000000..aa1ce5b
--- /dev/null
+++ b/lib/core/c++/particle/porosity.hpp
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "particle.hpp"
+#include "../math/n_closest.hpp"
+
+namespace kel {
+namespace lbm {
+template<typename T, uint64_t D, typename Coll>
+class particle_porosity {
+public:
+	saw::data<sch::Scalar<T>> calculate(const saw::data<>& part_group, uint64_t p_i, const saw::data<sch::Vector<T,D>>& lbm_pos){
+		auto& mask = part_group.template get<"mask">();
+
+		auto& particles = part_group.template get<"particles">();
+		auto& part_i = particles.at({p_i});
+
+		auto& part_i_rb = part_i.template get<"rigid_body">();
+		auto& pirb = part_i_rb.template get<"position">();
+
+		auto& dist = lbm_pos = lbm_pos - pirb;
+
+		// index 0 is at 
+
+		return {};
+	}
+};
+
+
+template<typename T, uint64_t D, typename saw::native_data_type<T>::type radius>
+class particle_porosity<T, D, coll::ParticleCollisionSpheroid<T,radius>> final {
+public:
+	saw::data<sch::Scalar<T>> calculate(const saw::data<sch::Particle>&, uint64_t i, const saw::data<sch::Vector<T,D>>& lbm_pos){
+		saw::data<sch::Scalar<T>> por;
+		por.at({});
+
+		saw::data<sch::Scalar<T>> dps_2;
+		for(uint64_t i{0u}; i < D; ++i){
+			auto& dps_i = lbm_pos.at({{i}});
+			dps_2.at({}) = dps_i * dps_i;
+		}
+
+		saw::data<sch::Scalar<T>> rad_2;
+		rad_2.at({}).set(radius*radius);
+
+		saw::data<sch::Scalar<T>> inside;
+		if(dps_2.at({}).get() < rad_2.at({}).get()){
+			inside.at({}).set(1);
+		}
+		return inside;
+	}
+};
+
+}
+}
author	Claudius "keldu" Holeksa <mail@keldu.de>	2026-06-02 20:20:48 +0200
committer	Claudius "keldu" Holeksa <mail@keldu.de>	2026-06-02 20:20:48 +0200
commit	5ea4875b96bfacd4c5f0125c9e7b64b70f0ccfb9 (patch)
tree	96b1625e2559e227e2f12802796450d64ab4ce45
parent	cf4132d9a02271847e774035c4a49ff9158ba289 (diff)
parent	da25b3a1e7776a810d3bda5af3f363cf3e986cae (diff)
download	libs-lbm-5ea4875b96bfacd4c5f0125c9e7b64b70f0ccfb9.tar.gz