Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_Tuners.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_KOKKOS_TUNERS_HPP
23 #define KOKKOS_KOKKOS_TUNERS_HPP
24 
25 #include <Kokkos_Macros.hpp>
26 #include <Kokkos_Core_fwd.hpp>
27 #include <Kokkos_ExecPolicy.hpp>
28 #include <KokkosExp_MDRangePolicy.hpp>
29 #include <impl/Kokkos_Profiling_Interface.hpp>
30 
31 #include <array>
32 #include <utility>
33 #include <tuple>
34 #include <string>
35 #include <vector>
36 #include <map>
37 #include <cassert>
38 
39 namespace Kokkos {
40 namespace Tools {
41 
42 namespace Experimental {
43 
44 // forward declarations
45 SetOrRange make_candidate_set(size_t size, int64_t* data);
46 bool have_tuning_tool();
47 size_t declare_output_type(const std::string&,
48  Kokkos::Tools::Experimental::VariableInfo);
49 void request_output_values(size_t, size_t,
50  Kokkos::Tools::Experimental::VariableValue*);
51 VariableValue make_variable_value(size_t, int64_t);
52 VariableValue make_variable_value(size_t, double);
53 SetOrRange make_candidate_range(double lower, double upper, double step,
54  bool openLower, bool openUpper);
55 size_t get_new_context_id();
56 void begin_context(size_t context_id);
57 void end_context(size_t context_id);
58 namespace Impl {
59 
65 template <typename ValueType, typename ContainedType>
67 
68 template <typename ValueType, typename ContainedType>
69 struct ValueHierarchyNode {
70  std::vector<ValueType> root_values;
71  std::vector<ContainedType> sub_values;
72  void add_root_value(const ValueType& in) noexcept {
73  root_values.push_back(in);
74  }
75  void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
76  const ValueType& get_root_value(const size_t index) const {
77  return root_values[index];
78  }
79  const ContainedType& get_sub_value(const size_t index) const {
80  return sub_values[index];
81  }
82 };
83 
84 template <typename ValueType>
85 struct ValueHierarchyNode<ValueType, void> {
86  std::vector<ValueType> root_values;
87  explicit ValueHierarchyNode(std::vector<ValueType> rv)
88  : root_values(std::move(rv)) {}
89  void add_root_value(const ValueType& in) noexcept {
90  root_values.push_back(in);
91  }
92  const ValueType& get_root_value(const size_t index) const {
93  return root_values[index];
94  }
95 };
96 
102 template <class NestedMap>
104 
105 // Vectors are our lowest-level, no nested values
106 template <class T>
107 struct MapTypeConverter<std::vector<T>> {
108  using type = ValueHierarchyNode<T, void>;
109 };
110 
111 // Maps contain both the "root" types and sub-vectors
112 template <class K, class V>
113 struct MapTypeConverter<std::map<K, V>> {
115 };
116 
122 template <class NestedMap>
124 
125 // Vectors are our lowest-level, no nested values. Just fill in the fundamental
126 // values
127 template <class T>
128 struct ValueHierarchyConstructor<std::vector<T>> {
129  using return_type = typename MapTypeConverter<std::vector<T>>::type;
130  static return_type build(const std::vector<T>& in) { return return_type{in}; }
131 };
132 
133 // For maps, we need to fill in the fundamental values, and construct child
134 // nodes
135 template <class K, class V>
136 struct ValueHierarchyConstructor<std::map<K, V>> {
137  using return_type = typename MapTypeConverter<std::map<K, V>>::type;
138  static return_type build(const std::map<K, V>& in) {
139  return_type node_to_build;
140  for (auto& entry : in) {
141  node_to_build.add_root_value(entry.first);
142  node_to_build.add_sub_container(
143  ValueHierarchyConstructor<V>::build(entry.second));
144  }
145  return node_to_build;
146  }
147 };
148 
157 template <class InspectForDepth>
159 
160 // The dimensionality of a vector is 1
161 template <class T>
162 struct get_space_dimensionality<std::vector<T>> {
163  static constexpr int value = 1;
164 };
165 
166 // The dimensionality of a map is 1 (the map) plus the dimensionality
167 // of the map's value type
168 template <class K, class V>
169 struct get_space_dimensionality<std::map<K, V>> {
170  static constexpr int value = 1 + get_space_dimensionality<V>::value;
171 };
172 
173 template <class T, int N>
174 struct n_dimensional_sparse_structure;
175 
176 template <class T>
177 struct n_dimensional_sparse_structure<T, 1> {
178  using type = std::vector<T>;
179 };
180 
181 template <class T, int N>
182 struct n_dimensional_sparse_structure {
183  using type =
184  std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
185 };
186 
193 // First, a helper to get the value in one dimension
194 template <class Container>
196 
197 // At any given level, just return your value at that level
198 template <class RootType, class Subtype>
199 struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
200  static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
201  double fraction_to_traverse) {
202  size_t index = dimension.root_values.size() * fraction_to_traverse;
203  return dimension.get_root_value(index);
204  }
205 };
206 
212 // At the bottom level, we have one double and a base-level ValueHierarchyNode
213 
214 template <class HierarchyNode, class... InterpolationIndices>
216 
217 template <class ValueType>
218 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
219  using node_type = ValueHierarchyNode<ValueType, void>;
220  using return_type = std::tuple<ValueType>;
221  static return_type build(const node_type& in, double index) {
222  return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
223  }
224 };
225 
226 // At levels above the bottom, we tuple_cat the result of our child on the end
227 // of our own tuple
228 template <class ValueType, class Subtype, class... Indices>
229 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
230  Indices...> {
231  using node_type = ValueHierarchyNode<ValueType, Subtype>;
232  using sub_tuple =
233  typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
234  using return_type = decltype(std::tuple_cat(
235  std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
236  static return_type build(const node_type& in, double fraction_to_traverse,
237  Indices... indices) {
238  size_t index = in.sub_values.size() * fraction_to_traverse;
239  auto dimension_value = std::make_tuple(
240  DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
241  return std::tuple_cat(dimension_value,
242  GetMultidimensionalPoint<Subtype, Indices...>::build(
243  in.get_sub_value(index), indices...));
244  }
245 };
246 
247 template <typename PointType, class ArrayType, size_t... Is>
248 auto get_point_helper(const PointType& in, const ArrayType& indices,
249  std::index_sequence<Is...>) {
250  using helper = GetMultidimensionalPoint<
251  PointType,
252  decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
253  return helper::build(in, std::get<Is>(indices).value.double_value...);
254 }
255 
256 template <typename PointType, typename ArrayType>
257 struct GetPoint;
258 
259 template <typename PointType, size_t ArraySize>
260 struct GetPoint<
261  PointType,
262  std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>> {
263  using index_set_type =
264  std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>;
265  static auto build(const PointType& in, const index_set_type& indices) {
266  return get_point_helper(in, indices, std::make_index_sequence<ArraySize>{});
267  }
268 };
269 
270 template <typename PointType, typename ArrayType>
271 auto get_point(const PointType& point, const ArrayType& indices) {
272  return GetPoint<PointType, ArrayType>::build(point, indices);
273 }
274 
275 } // namespace Impl
276 
277 template <template <class...> class Container, size_t MaxDimensionSize = 100,
278  class... TemplateArguments>
279 class MultidimensionalSparseTuningProblem {
280  public:
281  using ProblemSpaceInput = Container<TemplateArguments...>;
282  static constexpr int space_dimensionality =
283  Impl::get_space_dimensionality<ProblemSpaceInput>::value;
284  static constexpr size_t max_space_dimension_size = MaxDimensionSize;
285  static constexpr double tuning_min = 0.0;
286  static constexpr double tuning_max = 0.999;
287 
288  // Not declared as static constexpr to work around the following compiler bug
289  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96862
290  // where a floating-point expression cannot be constexpr under -frounding-math
291  double tuning_step = tuning_max / max_space_dimension_size;
292 
293  using StoredProblemSpace =
294  typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
295  using HierarchyConstructor =
296  typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
297 
298  using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
299  space_dimensionality>;
300  template <class Key, class Value>
301  using extended_map = std::map<Key, Value>;
302  template <typename Key>
303  using extended_problem =
304  MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
305  ProblemSpaceInput>;
306  template <typename Key, typename Value>
307  using ExtendedProblemSpace =
308  typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
309 
310  template <typename Key>
311  auto extend(const std::string& axis_name,
312  const std::vector<Key>& new_tuning_axis) const
313  -> extended_problem<Key> {
314  ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
315  for (auto& key : new_tuning_axis) {
316  extended_space.add_root_value(key);
317  extended_space.add_sub_container(m_space);
318  }
319  std::vector<std::string> extended_names;
320  extended_names.reserve(m_variable_names.size() + 1);
321  extended_names.push_back(axis_name);
322  extended_names.insert(extended_names.end(), m_variable_names.begin(),
323  m_variable_names.end());
324  return extended_problem<Key>(extended_space, extended_names);
325  }
326 
327  private:
328  StoredProblemSpace m_space;
329  std::array<size_t, space_dimensionality> variable_ids;
330  std::vector<std::string> m_variable_names;
331  size_t context;
332 
333  public:
334  MultidimensionalSparseTuningProblem() = default;
335 
336  MultidimensionalSparseTuningProblem(StoredProblemSpace space,
337  const std::vector<std::string>& names)
338  : m_space(std::move(space)), m_variable_names(names) {
339  assert(names.size() == space_dimensionality);
340  for (unsigned long x = 0; x < names.size(); ++x) {
341  VariableInfo info;
342  info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
343  info.category = Kokkos::Tools::Experimental::StatisticalCategory::
344  kokkos_value_interval;
345  info.valueQuantity =
346  Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
347  info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
348  tuning_min, tuning_max, tuning_step, true, true);
349  variable_ids[x] = declare_output_type(names[x], info);
350  }
351  }
352 
353  MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
354  const std::vector<std::string>& names)
355  : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
356  names) {}
357 
358  template <typename... Coordinates>
359  auto get_point(Coordinates... coordinates) {
360  using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
361  sizeof...(coordinates)>;
362  return Impl::get_point(
363  m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
364  0, static_cast<double>(coordinates))...}));
365  }
366 
367  auto begin() {
368  context = Kokkos::Tools::Experimental::get_new_context_id();
369  ValueArray values;
370  for (int x = 0; x < space_dimensionality; ++x) {
371  values[x] = Kokkos::Tools::Experimental::make_variable_value(
372  variable_ids[x], 0.0);
373  }
374  begin_context(context);
375  request_output_values(context, space_dimensionality, values.data());
376  return Impl::get_point(m_space, values);
377  }
378 
379  auto end() { end_context(context); }
380 };
381 
382 template <typename Tuner>
383 struct ExtendableTunerMixin {
384  template <typename Key>
385  auto combine(const std::string& axis_name,
386  const std::vector<Key>& new_axis) const {
387  const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
388  return sub_tuner.extend(axis_name, new_axis);
389  }
390 
391  template <typename... Coordinates>
392  auto get_point(Coordinates... coordinates) {
393  const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
394  return sub_tuner.get_point(coordinates...);
395  }
396 };
397 
398 template <size_t MaxDimensionSize = 100, template <class...> class Container,
399  class... TemplateArguments>
400 auto make_multidimensional_sparse_tuning_problem(
401  const Container<TemplateArguments...>& in, std::vector<std::string> names) {
402  return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
403  TemplateArguments...>(in, names);
404 }
405 
406 class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
407  private:
408  using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
409  using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
410  std::declval<SpaceDescription>(),
411  std::declval<std::vector<std::string>>()));
412  TunerType tuner;
413 
414  public:
415  TeamSizeTuner() = default;
416  TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
417  TeamSizeTuner(const TeamSizeTuner& other) = default;
418  TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
419  TeamSizeTuner(TeamSizeTuner&& other) = default;
420  template <typename ViableConfigurationCalculator, typename Functor,
421  typename TagType, typename... Properties>
422  TeamSizeTuner(const std::string& name,
424  const Functor& functor, const TagType& tag,
425  ViableConfigurationCalculator calc) {
426  using PolicyType = Kokkos::TeamPolicy<Properties...>;
427  auto initial_vector_length = policy.impl_vector_length();
428  if (initial_vector_length < 1) {
429  policy.impl_set_vector_length(1);
430  }
456  SpaceDescription space_description;
457 
458  auto max_vector_length = PolicyType::vector_length_max();
459  std::vector<int64_t> allowed_vector_lengths;
460 
461  if (policy.impl_auto_vector_length()) { // case 1 or 2
462  for (int vector_length = max_vector_length; vector_length >= 1;
463  vector_length /= 2) {
464  policy.impl_set_vector_length(vector_length);
477  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
478  if ((policy.impl_auto_team_size()) ||
479  (policy.team_size() <= max_team_size)) {
480  allowed_vector_lengths.push_back(vector_length);
481  }
482  }
483  } else { // case 3, there's only one vector length to care about
484  allowed_vector_lengths.push_back(policy.impl_vector_length());
485  }
486 
487  for (const auto vector_length : allowed_vector_lengths) {
488  std::vector<int64_t> allowed_team_sizes;
489  policy.impl_set_vector_length(vector_length);
490  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
491  if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
492  // sizes
493  for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
494  allowed_team_sizes.push_back(team_size);
495  }
496  } else { // case 2, just try the provided team size
497  allowed_team_sizes.push_back(policy.team_size());
498  }
499  space_description[vector_length] = allowed_team_sizes;
500  }
501  tuner = make_multidimensional_sparse_tuning_problem<20>(
502  space_description, {std::string(name + "_vector_length"),
503  std::string(name + "_team_size")});
504  policy.impl_set_vector_length(initial_vector_length);
505  }
506 
507  template <typename... Properties>
508  void tune(Kokkos::TeamPolicy<Properties...>& policy) {
509  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
510  auto configuration = tuner.begin();
511  auto team_size = std::get<1>(configuration);
512  auto vector_length = std::get<0>(configuration);
513  if (vector_length > 0) {
514  policy.impl_set_team_size(team_size);
515  policy.impl_set_vector_length(vector_length);
516  }
517  }
518  }
519  void end() {
520  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
521  tuner.end();
522  }
523  }
524 
525  TunerType get_tuner() const { return tuner; }
526 };
527 
528 namespace Impl {
529 
530 template <typename T>
531 void fill_tile(std::vector<T>& cont, int tile_size) {
532  for (int x = 1; x < tile_size; x *= 2) {
533  cont.push_back(x);
534  }
535 }
536 template <typename T, typename Mapped>
537 void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
538  for (int x = 1; x < tile_size; x *= 2) {
539  fill_tile(cont[x], tile_size / x);
540  }
541 }
542 } // namespace Impl
543 
544 template <int MDRangeRank>
545 struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
546  private:
547  static constexpr int rank = MDRangeRank;
548  static constexpr int max_slices = 15;
549  using SpaceDescription =
550  typename Impl::n_dimensional_sparse_structure<int, rank>::type;
551  using TunerType =
552  decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
553  std::declval<SpaceDescription>(),
554  std::declval<std::vector<std::string>>()));
555  TunerType tuner;
556 
557  public:
558  MDRangeTuner() = default;
559  template <typename Functor, typename TagType, typename Calculator,
560  typename... Properties>
561  MDRangeTuner(const std::string& name,
562  const Kokkos::MDRangePolicy<Properties...>& policy,
563  const Functor& functor, const TagType& tag, Calculator calc) {
564  SpaceDescription desc;
565  int max_tile_size =
566  calc.get_mdrange_max_tile_size_product(policy, functor, tag);
567  Impl::fill_tile(desc, max_tile_size);
568  std::vector<std::string> feature_names;
569  for (int x = 0; x < rank; ++x) {
570  feature_names.push_back(name + "_tile_size_" + std::to_string(x));
571  }
572  tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
573  desc, feature_names);
574  }
575  template <typename Policy, typename Tuple, size_t... Indices>
576  void set_policy_tile(Policy& policy, const Tuple& tuple,
577  const std::index_sequence<Indices...>&) {
578  policy.impl_change_tile_size({std::get<Indices>(tuple)...});
579  }
580  template <typename... Properties>
581  void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
582  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
583  auto configuration = tuner.begin();
584  set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
585  }
586  }
587  void end() {
588  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
589  tuner.end();
590  }
591  }
592 
593  TunerType get_tuner() const { return tuner; }
594 };
595 
596 template <class Choice>
597 struct CategoricalTuner {
598  using choice_list = std::vector<Choice>;
599  choice_list choices;
600  size_t context;
601  size_t tuning_variable_id;
602  CategoricalTuner(std::string name, choice_list m_choices)
603  : choices(m_choices) {
604  std::vector<int64_t> indices;
605  for (typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
606  indices.push_back(x);
607  }
608  VariableInfo info;
609  info.category = StatisticalCategory::kokkos_value_categorical;
610  info.valueQuantity = CandidateValueType::kokkos_value_set;
611  info.type = ValueType::kokkos_value_int64;
612  info.candidates = make_candidate_set(indices.size(), indices.data());
613  tuning_variable_id = declare_output_type(name, info);
614  }
615  const Choice& begin() {
616  context = get_new_context_id();
617  begin_context(context);
618  VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
619  request_output_values(context, 1, &value);
620  return choices[value.value.int_value];
621  }
622  void end() { end_context(context); }
623 };
624 
625 template <typename Choice>
626 auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
627  -> CategoricalTuner<Choice> {
628  return CategoricalTuner<Choice>(name, choices);
629 }
630 
631 } // namespace Experimental
632 } // namespace Tools
633 } // namespace Kokkos
634 
635 #endif
Execution policy for parallel work over a league of teams of threads.