summaryrefslogtreecommitdiff
path: root/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
blob: 83705f6727af0079d7d6c75c2e5c783c98aec7fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "mixed_precision.hpp"

saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity){
	return {
		/**
		 * Mixed
		 */
		[&](saw::data<sch::MixedArray, saw::encode::Sycl<saw::encode::Native>>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
			uint64_t in_size = in.size();

			mixed_ev = cmd->submit([&](cl::sycl::handler& h){
				auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);

				h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
					
					saw::data<sch::Float64> foo = {acc_buff[0u].at(it[0u]).get()};
					for(uint64_t i = 0; i < arithmetic_intensity; ++i){
						if( foo.get() == 1.1e12 ){
							acc_buff[0u].at(it[0u]) = 0.f;
						}
						foo = foo + foo * saw::data<sch::Float64>{1.7342345};
					}
					acc_buff[0u].at(it[0u]) = foo;
				});
			});
			return saw::void_t{};
		},
		[&](saw::data<sch::Float64Array, saw::encode::Sycl<saw::encode::Native>>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
			uint64_t in_size = in.size();
			float64_ev = cmd->submit([&](cl::sycl::handler& h){
				auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);

				h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
					saw::data<sch::Float64> foo = {acc_buff[0u].at(it[0u]).get()};
					for(uint64_t i = 0; i < arithmetic_intensity; ++i){
						if( foo == saw::data<sch::Float64>{1.1e12} ){
							acc_buff[0u].at(it[0u]) = 0.f;
						}
						foo = foo +foo * saw::data<sch::Float64>{1.7342345};
					}
					acc_buff[0u].at(it[0u]) = foo;
				});
			});
			return saw::void_t{};
		},
		[&](saw::data<sch::Float32Array, saw::encode::Sycl<saw::encode::Native>>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
			uint64_t in_size = in.size();
			float32_ev = cmd->submit([&](cl::sycl::handler& h){
				auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);

				h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
					saw::data<sch::Float32> foo = {acc_buff[0u].at(it[0u]).get()};
					for(uint64_t i = 0; i < arithmetic_intensity; ++i){
						if( foo == saw::data<sch::Float32>{1.1e12f} ){
							acc_buff[0u].at(it[0u]) = 0.f;
						}
						foo = foo + foo * saw::data<sch::Float32>{1.7342345f};
					}
					acc_buff[0u].at(it[0u]) = foo;
				});
			});
			return saw::void_t{};
		}
	};
}