#include "./mixed_precision.hpp" #include #include int main(int argc, char** argv){ using namespace saw; uint64_t start_test_size = 1024ul * 1024ul; if(argc <= 0 || argc >= 256){ std::cerr<<"Argument size being weird. Got "< args; args.resize(static_cast(argc)); for(uint64_t i = 0; i < args.size(); ++i){ args.at(i) = {argv[i]}; } if(args.size() > 1){ auto& str = args.at(1); auto ec = std::from_chars(str.data(), str.data() + str.size(), start_test_size); if(ec.ec != std::errc{}){ std::cerr<<"Start size is not an int."< 2){ auto& str = args.at(2); auto ec = std::from_chars(str.data(), str.data() + str.size(), max_test_size); if(ec.ec != std::errc{}){ std::cerr<<"Stop size is not an int."< max_test_size){ std::cerr<<"Invalid arguments. Stop size is smaller than Start size."< 3){ auto& str = args.at(3); auto ec = std::from_chars(str.data(), str.data() + str.size(), runs); if(ec.ec != std::errc{}){ std::cerr<<"Run size is not an int."< 4){ auto& str = args.at(4); auto ec = std::from_chars(str.data(), str.data() + str.size(), arithmetic_intensity); if(ec.ec != std::errc{}){ std::cerr<<"Arithmetic intensity is not an int."< dis{-3.0,-1.0}; saw::event_loop loop; saw::wait_scope wait{loop}; remote rmt; own> rmt_addr{}; rmt.resolve_address().then([&](auto addr){ rmt_addr = std::move(addr); }).detach(); wait.poll(); if(!rmt_addr){ return -1; } cl::sycl::event mixed_ev; cl::sycl::event float32_ev; cl::sycl::event float64_ev; auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity); data mixed_host_data; data float64_host_data; data float32_host_data; auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){ auto end = ev.get_profiling_info(); auto start = ev.get_profiling_info(); uint64_t curr_time = (end-start); current_min_time = std::min(curr_time, current_min_time); }; auto our_device = share>(); auto& device = *our_device; /** * Warmup */ std::cout<<"Warming up ..."<(gen_num); float64_host_data.at(i) = static_cast(gen_num); float32_host_data.at(i) = static_cast(gen_num); } data> mixed_device_data{mixed_host_data}; data> float64_device_data{float64_host_data}; data> float32_device_data{float32_host_data}; sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); device.get_handle().wait(); } std::cout<<"Benchmark starting ..."; /** * Benchmark */ std::stringstream sstr; for(uint64_t test_size = start_test_size; test_size <= max_test_size; test_size *= 2ul){ uint64_t time_mixed = std::numeric_limits::max(); uint64_t time_float64 = std::numeric_limits::max(); uint64_t time_float32 = std::numeric_limits::max(); for(uint64_t runs_i = 0u; runs_i < runs; ++runs_i){ (std::cout<<'.').flush(); data mixed_host_data; data float64_host_data; data float32_host_data; mixed_host_data = {test_size}; float64_host_data = {test_size}; float32_host_data = {test_size}; for(uint64_t i = 0; i < test_size; ++i){ double gen_num = dis(e1); mixed_host_data.at(i) = static_cast(gen_num); float64_host_data.at(i) = static_cast(gen_num); float32_host_data.at(i) = static_cast(gen_num); } data> mixed_device_data{mixed_host_data}; data> float64_device_data{float64_host_data}; data> float32_device_data{float32_host_data}; sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_mixed, mixed_ev); sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float64, float64_ev); sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float32, float32_ev); } sstr<