17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_KOKKOS_TUNERS_HPP
23 #define KOKKOS_KOKKOS_TUNERS_HPP
25 #include <Kokkos_Macros.hpp>
26 #include <Kokkos_Core_fwd.hpp>
27 #include <Kokkos_ExecPolicy.hpp>
28 #include <KokkosExp_MDRangePolicy.hpp>
29 #include <impl/Kokkos_Profiling_Interface.hpp>
42 namespace Experimental {
45 SetOrRange make_candidate_set(
size_t size, int64_t* data);
46 bool have_tuning_tool();
47 size_t declare_output_type(
const std::string&,
48 Kokkos::Tools::Experimental::VariableInfo);
49 void request_output_values(
size_t,
size_t,
50 Kokkos::Tools::Experimental::VariableValue*);
51 VariableValue make_variable_value(
size_t, int64_t);
52 VariableValue make_variable_value(
size_t,
double);
53 SetOrRange make_candidate_range(
double lower,
double upper,
double step,
54 bool openLower,
bool openUpper);
55 SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
56 bool openLower,
bool openUpper);
57 size_t get_new_context_id();
58 void begin_context(
size_t context_id);
59 void end_context(
size_t context_id);
67 template <
typename ValueType,
typename ContainedType>
68 struct ValueHierarchyNode;
70 template <
typename ValueType,
typename ContainedType>
71 struct ValueHierarchyNode {
72 std::vector<ValueType> root_values;
73 std::vector<ContainedType> sub_values;
74 void add_root_value(
const ValueType& in) noexcept {
75 root_values.push_back(in);
77 void add_sub_container(
const ContainedType& in) { sub_values.push_back(in); }
78 const ValueType& get_root_value(
const size_t index)
const {
79 return root_values[index];
81 const ContainedType& get_sub_value(
const size_t index)
const {
82 return sub_values[index];
86 template <
typename ValueType>
87 struct ValueHierarchyNode<ValueType, void> {
88 std::vector<ValueType> root_values;
89 explicit ValueHierarchyNode(std::vector<ValueType> rv)
90 : root_values(std::move(rv)) {}
91 void add_root_value(
const ValueType& in) noexcept {
92 root_values.push_back(in);
94 const ValueType& get_root_value(
const size_t index)
const {
95 return root_values[index];
104 template <
class NestedMap>
105 struct MapTypeConverter;
109 struct MapTypeConverter<std::vector<T>> {
110 using type = ValueHierarchyNode<T, void>;
114 template <
class K,
class V>
115 struct MapTypeConverter<std::map<K, V>> {
116 using type = ValueHierarchyNode<K, typename MapTypeConverter<V>::type>;
124 template <
class NestedMap>
125 struct ValueHierarchyConstructor;
130 struct ValueHierarchyConstructor<std::vector<T>> {
131 using return_type =
typename MapTypeConverter<std::vector<T>>::type;
132 static return_type build(
const std::vector<T>& in) {
return return_type{in}; }
137 template <
class K,
class V>
138 struct ValueHierarchyConstructor<std::map<K, V>> {
139 using return_type =
typename MapTypeConverter<std::map<K, V>>::type;
140 static return_type build(
const std::map<K, V>& in) {
141 return_type node_to_build;
142 for (
auto& entry : in) {
143 node_to_build.add_root_value(entry.first);
144 node_to_build.add_sub_container(
145 ValueHierarchyConstructor<V>::build(entry.second));
147 return node_to_build;
159 template <
class InspectForDepth>
160 struct get_space_dimensionality;
164 struct get_space_dimensionality<std::vector<T>> {
165 static constexpr
int value = 1;
170 template <
class K,
class V>
171 struct get_space_dimensionality<std::map<K, V>> {
172 static constexpr
int value = 1 + get_space_dimensionality<V>::value;
175 template <
class T,
int N>
176 struct n_dimensional_sparse_structure;
179 struct n_dimensional_sparse_structure<T, 1> {
180 using type = std::vector<T>;
183 template <
class T,
int N>
184 struct n_dimensional_sparse_structure {
186 std::map<T,
typename n_dimensional_sparse_structure<T, N - 1>::type>;
196 template <
class Container>
197 struct DimensionValueExtractor;
200 template <
class RootType,
class Subtype>
201 struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
202 static RootType
get(
const ValueHierarchyNode<RootType, Subtype>& dimension,
203 double fraction_to_traverse) {
204 size_t index = dimension.root_values.size() * fraction_to_traverse;
205 return dimension.get_root_value(index);
216 template <
class HierarchyNode,
class... InterpolationIndices>
217 struct GetMultidimensionalPoint;
219 template <
class ValueType>
220 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
221 using node_type = ValueHierarchyNode<ValueType, void>;
222 using return_type = std::tuple<ValueType>;
223 static return_type build(
const node_type& in,
double index) {
224 return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
230 template <
class ValueType,
class Subtype,
class... Indices>
231 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
233 using node_type = ValueHierarchyNode<ValueType, Subtype>;
235 typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
236 using return_type = decltype(std::tuple_cat(
237 std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
238 static return_type build(
const node_type& in,
double fraction_to_traverse,
239 Indices... indices) {
240 size_t index = in.sub_values.size() * fraction_to_traverse;
241 auto dimension_value = std::make_tuple(
242 DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
243 return std::tuple_cat(dimension_value,
244 GetMultidimensionalPoint<Subtype, Indices...>::build(
245 in.get_sub_value(index), indices...));
249 template <
typename PointType,
class ArrayType,
size_t... Is>
250 auto get_point_helper(
const PointType& in,
const ArrayType& indices,
251 std::index_sequence<Is...>) {
252 using helper = GetMultidimensionalPoint<
254 decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
255 return helper::build(in, std::get<Is>(indices).value.double_value...);
258 template <
typename Po
intType,
typename ArrayType>
261 template <
typename Po
intType,
size_t ArraySize>
264 std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>> {
265 using index_set_type =
266 std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>;
267 static auto build(
const PointType& in,
const index_set_type& indices) {
268 return get_point_helper(in, indices, std::make_index_sequence<ArraySize>{});
272 template <
typename Po
intType,
typename ArrayType>
273 auto get_point(
const PointType& point,
const ArrayType& indices) {
274 return GetPoint<PointType, ArrayType>::build(point, indices);
279 template <
template <
class...>
class Container,
size_t MaxDimensionSize = 100,
280 class... TemplateArguments>
281 class MultidimensionalSparseTuningProblem {
283 using ProblemSpaceInput = Container<TemplateArguments...>;
284 static constexpr
int space_dimensionality =
285 Impl::get_space_dimensionality<ProblemSpaceInput>::value;
286 static constexpr
size_t max_space_dimension_size = MaxDimensionSize;
287 static constexpr
double tuning_min = 0.0;
288 static constexpr
double tuning_max = 0.999;
293 double tuning_step = tuning_max / max_space_dimension_size;
295 using StoredProblemSpace =
296 typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
297 using HierarchyConstructor =
298 typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
300 using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
301 space_dimensionality>;
302 template <
class Key,
class Value>
303 using extended_map = std::map<Key, Value>;
304 template <
typename Key>
305 using extended_problem =
306 MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
308 template <
typename Key,
typename Value>
309 using ExtendedProblemSpace =
310 typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
312 template <
typename Key>
313 auto extend(
const std::string& axis_name,
314 const std::vector<Key>& new_tuning_axis)
const
315 -> extended_problem<Key> {
316 ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
317 for (
auto& key : new_tuning_axis) {
318 extended_space.add_root_value(key);
319 extended_space.add_sub_container(m_space);
321 std::vector<std::string> extended_names;
322 extended_names.reserve(m_variable_names.size() + 1);
323 extended_names.push_back(axis_name);
324 extended_names.insert(extended_names.end(), m_variable_names.begin(),
325 m_variable_names.end());
326 return extended_problem<Key>(extended_space, extended_names);
330 StoredProblemSpace m_space;
331 std::array<size_t, space_dimensionality> variable_ids;
332 std::vector<std::string> m_variable_names;
336 MultidimensionalSparseTuningProblem() =
default;
338 MultidimensionalSparseTuningProblem(StoredProblemSpace space,
339 const std::vector<std::string>& names)
340 : m_space(std::move(space)), m_variable_names(names) {
341 assert(names.size() == space_dimensionality);
342 for (
unsigned long x = 0; x < names.size(); ++x) {
344 info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
345 info.category = Kokkos::Tools::Experimental::StatisticalCategory::
346 kokkos_value_interval;
348 Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
349 info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
350 tuning_min, tuning_max, tuning_step,
true,
true);
351 variable_ids[x] = declare_output_type(names[x], info);
355 MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
356 const std::vector<std::string>& names)
357 : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
360 template <
typename... Coordinates>
361 auto get_point(Coordinates... coordinates) {
362 using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
363 sizeof...(coordinates)>;
364 return Impl::get_point(
365 m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
366 0, static_cast<double>(coordinates))...}));
370 context = Kokkos::Tools::Experimental::get_new_context_id();
372 for (
int x = 0; x < space_dimensionality; ++x) {
373 values[x] = Kokkos::Tools::Experimental::make_variable_value(
374 variable_ids[x], 0.0);
376 begin_context(context);
377 request_output_values(context, space_dimensionality, values.data());
378 return Impl::get_point(m_space, values);
381 auto end() { end_context(context); }
384 template <
typename Tuner>
385 struct ExtendableTunerMixin {
386 template <
typename Key>
387 auto combine(
const std::string& axis_name,
388 const std::vector<Key>& new_axis)
const {
389 const auto& sub_tuner =
static_cast<const Tuner*
>(
this)->get_tuner();
390 return sub_tuner.extend(axis_name, new_axis);
393 template <
typename... Coordinates>
394 auto get_point(Coordinates... coordinates) {
395 const auto& sub_tuner =
static_cast<const Tuner*
>(
this)->get_tuner();
396 return sub_tuner.get_point(coordinates...);
400 ExtendableTunerMixin() =
default;
404 template <
size_t MaxDimensionSize = 100,
template <
class...>
class Container,
405 class... TemplateArguments>
406 auto make_multidimensional_sparse_tuning_problem(
407 const Container<TemplateArguments...>& in, std::vector<std::string> names) {
408 return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
409 TemplateArguments...>(in, names);
412 class TeamSizeTuner :
public ExtendableTunerMixin<TeamSizeTuner> {
414 using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
415 using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
416 std::declval<SpaceDescription>(),
417 std::declval<std::vector<std::string>>()));
421 TeamSizeTuner() =
default;
422 TeamSizeTuner& operator=(
const TeamSizeTuner& other) =
default;
423 TeamSizeTuner(
const TeamSizeTuner& other) =
default;
424 TeamSizeTuner& operator=(TeamSizeTuner&& other) =
default;
425 TeamSizeTuner(TeamSizeTuner&& other) =
default;
426 template <
typename ViableConfigurationCalculator,
typename Functor,
427 typename TagType,
typename... Properties>
428 TeamSizeTuner(
const std::string& name,
430 const Functor& functor,
const TagType& tag,
431 ViableConfigurationCalculator calc) {
433 PolicyType policy(policy_in);
434 auto initial_vector_length = policy.impl_vector_length();
435 if (initial_vector_length < 1) {
436 policy.impl_set_vector_length(1);
463 SpaceDescription space_description;
465 auto max_vector_length = PolicyType::vector_length_max();
466 std::vector<int64_t> allowed_vector_lengths;
468 if (policy.impl_auto_vector_length()) {
469 for (
int vector_length = max_vector_length; vector_length >= 1;
470 vector_length /= 2) {
471 policy.impl_set_vector_length(vector_length);
484 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
485 if ((policy.impl_auto_team_size()) ||
486 (policy.team_size() <= max_team_size)) {
487 allowed_vector_lengths.push_back(vector_length);
491 allowed_vector_lengths.push_back(policy.impl_vector_length());
494 for (
const auto vector_length : allowed_vector_lengths) {
495 std::vector<int64_t> allowed_team_sizes;
496 policy.impl_set_vector_length(vector_length);
497 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
498 if (policy.impl_auto_team_size()) {
500 for (
int team_size = max_team_size; team_size >= 1; team_size /= 2) {
501 allowed_team_sizes.push_back(team_size);
504 allowed_team_sizes.push_back(policy.team_size());
506 space_description[vector_length] = allowed_team_sizes;
508 tuner = make_multidimensional_sparse_tuning_problem<20>(
509 space_description, {std::string(name +
"_vector_length"),
510 std::string(name +
"_team_size")});
511 policy.impl_set_vector_length(initial_vector_length);
514 template <
typename... Properties>
517 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
518 auto configuration = tuner.begin();
519 auto team_size = std::get<1>(configuration);
520 auto vector_length = std::get<0>(configuration);
521 if (vector_length > 0) {
522 policy.impl_set_team_size(team_size);
523 policy.impl_set_vector_length(vector_length);
529 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
534 TunerType get_tuner()
const {
return tuner; }
538 struct tuning_type_for;
541 struct tuning_type_for<double> {
542 static constexpr Kokkos::Tools::Experimental::ValueType value =
543 Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
545 const Kokkos::Tools::Experimental::VariableValue& value_struct) {
546 return value_struct.value.double_value;
550 struct tuning_type_for<int64_t> {
551 static constexpr Kokkos::Tools::Experimental::ValueType value =
552 Kokkos::Tools::Experimental::ValueType::kokkos_value_int64;
554 const Kokkos::Tools::Experimental::VariableValue& value_struct) {
555 return value_struct.value.int_value;
559 template <
class Bound>
560 class SingleDimensionalRangeTuner {
563 using tuning_util = Impl::tuning_type_for<Bound>;
568 SingleDimensionalRangeTuner() =
default;
569 SingleDimensionalRangeTuner(
570 const std::string& name,
571 Kokkos::Tools::Experimental::StatisticalCategory category,
572 Bound default_val, Bound lower, Bound upper, Bound step = (Bound)0) {
573 default_value = default_val;
574 Kokkos::Tools::Experimental::VariableInfo info;
575 info.category = category;
576 info.candidates = make_candidate_range(
577 static_cast<Bound>(lower), static_cast<Bound>(upper),
578 static_cast<Bound>(step),
false,
false);
580 Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
581 info.type = tuning_util::value;
582 id = Kokkos::Tools::Experimental::declare_output_type(name, info);
586 context = Kokkos::Tools::Experimental::get_new_context_id();
587 Kokkos::Tools::Experimental::begin_context(context);
589 Kokkos::Tools::Experimental::make_variable_value(
id, default_value);
590 Kokkos::Tools::Experimental::request_output_values(context, 1,
592 return tuning_util::get(tuned_value);
595 void end() { Kokkos::Tools::Experimental::end_context(context); }
597 template <
typename Functor>
598 void with_tuned_value(Functor& func) {
604 class RangePolicyOccupancyTuner {
606 using TunerType = SingleDimensionalRangeTuner<int64_t>;
610 RangePolicyOccupancyTuner() =
default;
611 template <
typename ViableConfigurationCalculator,
typename Functor,
612 typename TagType,
typename... Properties>
613 RangePolicyOccupancyTuner(
const std::string& name,
615 const Functor&,
const TagType&,
616 ViableConfigurationCalculator)
617 : tuner(TunerType(name,
618 Kokkos::Tools::Experimental::StatisticalCategory::
622 template <
typename... Properties>
625 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
626 auto occupancy = tuner.begin();
627 policy.impl_set_desired_occupancy(
628 Kokkos::Experimental::DesiredOccupancy{
static_cast<int>(occupancy)});
633 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
638 TunerType get_tuner()
const {
return tuner; }
643 template <
typename T>
644 void fill_tile(std::vector<T>& cont,
int tile_size) {
645 for (
int x = 1; x < tile_size; x *= 2) {
649 template <
typename T,
typename Mapped>
650 void fill_tile(std::map<T, Mapped>& cont,
int tile_size) {
651 for (
int x = 1; x < tile_size; x *= 2) {
652 fill_tile(cont[x], tile_size / x);
657 template <
int MDRangeRank>
658 struct MDRangeTuner :
public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
660 static constexpr
int rank = MDRangeRank;
661 static constexpr
int max_slices = 15;
662 using SpaceDescription =
663 typename Impl::n_dimensional_sparse_structure<int, rank>::type;
665 decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
666 std::declval<SpaceDescription>(),
667 std::declval<std::vector<std::string>>()));
671 MDRangeTuner() =
default;
672 template <
typename Functor,
typename TagType,
typename Calculator,
673 typename... Properties>
674 MDRangeTuner(
const std::string& name,
675 const Kokkos::MDRangePolicy<Properties...>& policy,
676 const Functor& functor,
const TagType& tag, Calculator calc) {
677 SpaceDescription desc;
679 calc.get_mdrange_max_tile_size_product(policy, functor, tag);
680 Impl::fill_tile(desc, max_tile_size);
681 std::vector<std::string> feature_names;
682 for (
int x = 0; x < rank; ++x) {
683 feature_names.push_back(name +
"_tile_size_" + std::to_string(x));
685 tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
686 desc, feature_names);
688 template <
typename Policy,
typename Tuple,
size_t... Indices>
689 void set_policy_tile(Policy& policy,
const Tuple& tuple,
690 const std::index_sequence<Indices...>&) {
691 policy.impl_change_tile_size({std::get<Indices>(tuple)...});
693 template <
typename... Properties>
694 auto tune(
const Kokkos::MDRangePolicy<Properties...>& policy_in) {
695 Kokkos::MDRangePolicy<Properties...> policy(policy_in);
696 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
697 auto configuration = tuner.begin();
698 set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
703 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
708 TunerType get_tuner()
const {
return tuner; }
711 template <
class Choice>
712 struct CategoricalTuner {
713 using choice_list = std::vector<Choice>;
716 size_t tuning_variable_id;
717 CategoricalTuner(std::string name, choice_list m_choices)
718 : choices(m_choices) {
719 std::vector<int64_t> indices;
720 for (
typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
721 indices.push_back(x);
724 info.category = StatisticalCategory::kokkos_value_categorical;
725 info.valueQuantity = CandidateValueType::kokkos_value_set;
726 info.type = ValueType::kokkos_value_int64;
727 info.candidates = make_candidate_set(indices.size(), indices.data());
728 tuning_variable_id = declare_output_type(name, info);
730 const Choice& begin() {
731 context = get_new_context_id();
732 begin_context(context);
733 VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
734 request_output_values(context, 1, &value);
735 return choices[value.value.int_value];
737 void end() { end_context(context); }
740 template <
typename Choice>
741 auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
742 -> CategoricalTuner<Choice> {
743 return CategoricalTuner<Choice>(name, choices);
Execution policy for parallel work over a league of teams of threads.