summaryrefslogtreecommitdiff
path: root/modules/remote-sycl/benchmarks/mixed_precision_alternative.cpp
blob: 4afb29e9a6218b795764f4179cb772a2a8adbe77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include "../c++/data.hpp"

#include <random>

namespace sch {
using namespace saw::schema;
}

template<typename T>
void inner_work(){
	std::random_device r;
	std::default_random_engine e1{r()};
	std::uniform_real_distribution<> dis{-3.0,-1.0};

	acpp::sycl::queue sycl_q;
	acpp::sycl::event ev;
	
	auto time_eval = [](uint64_t & current_min_time, acpp::sycl::event& evt){
		auto end = evt.get_profiling_info<acpp::sycl::info::event_profiling::command_end>();
		auto start = evt.get_profiling_info<acpp::sycl::info::event_profiling::command_start>();

		uint64_t curr_time = (end-start);
		current_min_time = std::min(curr_time, current_min_time);
	};

	constexpr uint64_t arithmetic_intensity = 1024ul;

	/**
	 * Warmup
	 */
	std::cout<<"Warming up ..."<<std::endl;
	for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
		data<sch::Array<T>, encode::Sycl<encode::Native>> dat{{{test_size}},sycl_q};
		data<sch::Ref<sch::Array<T>>, encode::Sycl<encode::Native>> dat_ref{dat};
		auto dat_ptr = dat_ref.get_internal_data();

		for(uint64_t i = 0; i < test_size; ++i){
			double gen_num = dis(e1);
			dat.at({{i}}) = {static_cast<double>(gen_num)};
		}
		
		sycl_q.parallel_for([=](acpp::sycl::id<1> idx){
			data<T::InterfaceSchema> foo = {dat_ptr[idx[0u]].get()};
			for(uint64_t i = 0; i < arithmetic_intensity; ++i){
				if( foo.get() == 1.1e12 ){
					dat_ptr[idx[0u]] = {};
				}
				foo = foo + foo * saw::data<T::InterfaceSchema>{1.7342345};
			}
			dat_ptr[idx[0u]] = foo;
		}).wait();
	}
}

int main(){
	using namespace saw;
	inner_work<sch::Float32>();
	inner_work<sch::Float64>();
	inner_work<sch::MixedPrecision<sch::Float64,sch::Float32>>();
	return 0;
}