#include "mixed_precision.hpp" saw::interface, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity){ return { /** * Mixed */ [&](saw::data>& in, cl::sycl::queue* cmd) -> saw::error_or { uint64_t in_size = in.size(); mixed_ev = cmd->submit([&](cl::sycl::handler& h){ auto acc_buff = in.template access(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ saw::data foo = {acc_buff[0u].at(it[0u]).get()}; for(uint64_t i = 0; i < arithmetic_intensity; ++i){ if( foo.get() == 1.1e12 ){ acc_buff[0u].at(it[0u]) = 0.f; } foo = foo + foo * saw::data{1.7342345}; } acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; }, [&](saw::data>& in, cl::sycl::queue* cmd) -> saw::error_or { uint64_t in_size = in.size(); float64_ev = cmd->submit([&](cl::sycl::handler& h){ auto acc_buff = in.template access(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ saw::data foo = {acc_buff[0u].at(it[0u]).get()}; for(uint64_t i = 0; i < arithmetic_intensity; ++i){ if( foo == saw::data{1.1e12} ){ acc_buff[0u].at(it[0u]) = 0.f; } foo = foo +foo * saw::data{1.7342345}; } acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; }, [&](saw::data>& in, cl::sycl::queue* cmd) -> saw::error_or { uint64_t in_size = in.size(); float32_ev = cmd->submit([&](cl::sycl::handler& h){ auto acc_buff = in.template access(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ saw::data foo = {acc_buff[0u].at(it[0u]).get()}; for(uint64_t i = 0; i < arithmetic_intensity; ++i){ if( foo == saw::data{1.1e12f} ){ acc_buff[0u].at(it[0u]) = 0.f; } foo = foo + foo * saw::data{1.7342345f}; } acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; } }; }