blob: 4afb29e9a6218b795764f4179cb772a2a8adbe77 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
#include "../c++/data.hpp"
#include <random>
namespace sch {
using namespace saw::schema;
}
template<typename T>
void inner_work(){
std::random_device r;
std::default_random_engine e1{r()};
std::uniform_real_distribution<> dis{-3.0,-1.0};
acpp::sycl::queue sycl_q;
acpp::sycl::event ev;
auto time_eval = [](uint64_t & current_min_time, acpp::sycl::event& evt){
auto end = evt.get_profiling_info<acpp::sycl::info::event_profiling::command_end>();
auto start = evt.get_profiling_info<acpp::sycl::info::event_profiling::command_start>();
uint64_t curr_time = (end-start);
current_min_time = std::min(curr_time, current_min_time);
};
constexpr uint64_t arithmetic_intensity = 1024ul;
/**
* Warmup
*/
std::cout<<"Warming up ..."<<std::endl;
for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
data<sch::Array<T>, encode::Sycl<encode::Native>> dat{{{test_size}},sycl_q};
data<sch::Ref<sch::Array<T>>, encode::Sycl<encode::Native>> dat_ref{dat};
auto dat_ptr = dat_ref.get_internal_data();
for(uint64_t i = 0; i < test_size; ++i){
double gen_num = dis(e1);
dat.at({{i}}) = {static_cast<double>(gen_num)};
}
sycl_q.parallel_for([=](acpp::sycl::id<1> idx){
data<T::InterfaceSchema> foo = {dat_ptr[idx[0u]].get()};
for(uint64_t i = 0; i < arithmetic_intensity; ++i){
if( foo.get() == 1.1e12 ){
dat_ptr[idx[0u]] = {};
}
foo = foo + foo * saw::data<T::InterfaceSchema>{1.7342345};
}
dat_ptr[idx[0u]] = foo;
}).wait();
}
}
int main(){
using namespace saw;
inner_work<sch::Float32>();
inner_work<sch::Float64>();
inner_work<sch::MixedPrecision<sch::Float64,sch::Float32>>();
return 0;
}
|