Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_Behavior.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_EnvVariables.hpp"
11 #include "Teuchos_OrdinalTraits.hpp"
12 #include "Teuchos_TestForException.hpp"
13 #include "TpetraCore_config.h"
15 #include "KokkosKernels_config.h" // for TPL enable macros
16 #include <array>
17 #include <functional>
18 #include <map>
19 
54 // environ should be available on posix platforms
55 #if not(defined(WIN) && (_MSC_VER >= 1900))
56 // needs to be in the global namespace
57 extern char **environ;
58 #endif
59 
60 namespace Tpetra {
61 namespace Details {
62 
63 namespace BehaviorDetails {
64 
65 constexpr const std::string_view RESERVED_PREFIX = "TPETRA_";
66 constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67  "TPETRA_ASSUME_GPU_AWARE_MPI";
68 constexpr const std::string_view CUDA_LAUNCH_BLOCKING = "CUDA_LAUNCH_BLOCKING";
69 constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70  "MM_TAFC_OptimizationCoreCount";
71 constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72  "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73 constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74  "TPETRA_ROW_IMBALANCE_THRESHOLD";
75 constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76  "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77 constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78  "TPETRA_VECTOR_DEVICE_THRESHOLD";
79 constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80  "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81 constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82  "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83 constexpr const std::string_view USE_TEUCHOS_TIMERS =
84  "TPETRA_USE_TEUCHOS_TIMERS";
85 constexpr const std::string_view USE_KOKKOS_PROFILING =
86  "TPETRA_USE_KOKKOS_PROFILING";
87 constexpr const std::string_view DEBUG = "TPETRA_DEBUG";
88 constexpr const std::string_view VERBOSE = "TPETRA_VERBOSE";
89 constexpr const std::string_view TIMING = "TPETRA_TIMING";
90 constexpr const std::string_view HIERARCHICAL_UNPACK =
91  "TPETRA_HIERARCHICAL_UNPACK";
92 constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93  "TPETRA_SKIP_COPY_AND_PERMUTE";
94 constexpr const std::string_view FUSED_RESIDUAL = "TPETRA_FUSED_RESIDUAL";
95 constexpr const std::string_view OVERLAP = "TPETRA_OVERLAP";
96 constexpr const std::string_view DEFAULT_SEND_TYPE = "TPETRA_DEFAULT_SEND_TYPE";
97 constexpr const std::string_view GRANULAR_TRANSFERS = "TPETRA_GRANULAR_TRANSFERS";
98 constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99  "TPETRA_SPACES_ID_WARN_LIMIT";
100 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101  "TPETRA_TIME_KOKKOS_DEEP_COPY";
102 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103  "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105  "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106 constexpr const std::string_view TIME_KOKKOS_FENCE = "TPETRA_TIME_KOKKOS_FENCE";
107 constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108  "TPETRA_TIME_KOKKOS_FUNCTIONS";
109 
110 // construct an std::array of string_view with any number of provided
111 // string_views
112 template <typename... Elems>
113 constexpr std::array<std::string_view, sizeof...(Elems)>
114 make_array(Elems &&...elems) {
115  return {std::forward<Elems>(elems)...};
116 }
117 
118 constexpr const auto RECOGNIZED_VARS = make_array(
119  ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120  VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121  MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122  HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123  USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124  HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125  DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126  SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127  TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
128 
129 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130 bool verboseDisabled_ = false;
131 bool timingDisabled_ = false;
132 } // namespace BehaviorDetails
133 
134 namespace { // (anonymous)
135 
136 void split(const std::string_view s,
137  std::function<void(const std::string &)> f,
138  const char sep = ',') {
139  typedef std::string::size_type size_type;
140  size_type cur_pos, last_pos = 0, length = s.length();
141  while (last_pos < length + 1) {
142  cur_pos = s.find_first_of(sep, last_pos);
143  if (cur_pos == std::string::npos) {
144  cur_pos = length;
145  }
146  if (cur_pos != last_pos) {
147  auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
148  f(token);
149  }
150  last_pos = cur_pos + 1;
151  }
152  return;
153 }
154 
155 constexpr bool debugDefault() {
156 #ifdef HAVE_TPETRA_DEBUG
157  return true;
158 #else
159  return false;
160 #endif // HAVE_TPETRA_DEBUG
161 }
162 
163 constexpr bool verboseDefault() {
164  return false;
165 }
166 
167 constexpr bool timingDefault() {
168  return false;
169 }
170 
171 constexpr bool assumeMpiIsGPUAwareDefault() {
172 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
173  return true;
174 #else
175  return false;
176 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
177 }
178 
179 constexpr bool cudaLaunchBlockingDefault() {
180  return false;
181 }
182 
183 constexpr bool hierarchicalUnpackDefault() {
184  return true;
185 }
186 
187 } // namespace
188 
190  static bool once = false;
191 
192  if (!once) {
193  const char prefix[] = "Tpetra::Details::Behavior: ";
194  char **env;
195 #if defined(WIN) && (_MSC_VER >= 1900)
196  env = *__p__environ();
197 #else
198  env = environ; // defined at the top of this file as extern char **environ;
199 #endif
200  for (; *env; ++env) {
201  std::string name;
202  std::string value;
203  const std::string_view ev(*env);
204 
205  // split name=value on the first =, everything before = is name
206  split(
207  ev,
208  [&](const std::string &s) {
209  if (name.empty()) {
210  name = s;
211  } else {
212  value = s;
213  }
214  },
215  '=');
216 
217  if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
218  name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
219  BehaviorDetails::RESERVED_PREFIX) {
220  const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
221  BehaviorDetails::RECOGNIZED_VARS.end(), name);
222  TEUCHOS_TEST_FOR_EXCEPTION(
223  it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
224  prefix << "Environment "
225  "variable \""
226  << name << "\" (prefixed with \""
227  << BehaviorDetails::RESERVED_PREFIX
228  << "\") is not a recognized Tpetra variable.");
229  }
230  }
231 
232  once = true;
233  }
234 }
235 
237  constexpr bool defaultValue = debugDefault();
238 
239  static bool value_ = defaultValue;
240  static bool initialized_ = false;
241  return Teuchos::idempotentlyGetEnvironmentVariable(
242  value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
243 }
244 
246  if (BehaviorDetails::verboseDisabled_)
247  return false;
248 
249  constexpr bool defaultValue = verboseDefault();
250 
251  static bool value_ = defaultValue;
252  static bool initialized_ = false;
253  return Teuchos::idempotentlyGetEnvironmentVariable(
254  value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
255 }
256 
258  if (BehaviorDetails::timingDisabled_)
259  return false;
260 
261  constexpr bool defaultValue = timingDefault();
262 
263  static bool value_ = defaultValue;
264  static bool initialized_ = false;
265  return Teuchos::idempotentlyGetEnvironmentVariable(
266  value_, initialized_, BehaviorDetails::TIMING, defaultValue);
267 }
268 
270  constexpr bool defaultValue = assumeMpiIsGPUAwareDefault();
271 
272  static bool value_ = defaultValue;
273  static bool initialized_ = false;
274  return Teuchos::idempotentlyGetEnvironmentVariable(
275  value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
276  defaultValue);
277 }
278 
280  constexpr bool defaultValue = cudaLaunchBlockingDefault();
281 
282  static bool value_ = defaultValue;
283  static bool initialized_ = false;
284  return Teuchos::idempotentlyGetEnvironmentVariable(
285  value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
286  defaultValue);
287 }
288 
290  constexpr int _default = 3000;
291  static int value_ = _default;
292  static bool initialized_ = false;
293  return Teuchos::idempotentlyGetEnvironmentVariable(
294  value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
295  _default);
296 }
297 
299  constexpr size_t defaultValue(200);
300 
301  static size_t value_ = defaultValue;
302  static bool initialized_ = false;
303  return Teuchos::idempotentlyGetEnvironmentVariable(
304  value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
305  defaultValue);
306 }
307 
309  constexpr size_t defaultValue(256);
310 
311  static size_t value_ = defaultValue;
312  static bool initialized_ = false;
313  return Teuchos::idempotentlyGetEnvironmentVariable(
314  value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
315  defaultValue);
316 }
317 
319  constexpr bool defaultValue = false;
320 
321  static bool value_ = defaultValue;
322  static bool initialized_ = false;
323  return Teuchos::idempotentlyGetEnvironmentVariable(
324  value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
325  defaultValue);
326 }
327 
329  constexpr size_t defaultValue(22000);
330 
331  static size_t value_ = defaultValue;
332  static bool initialized_ = false;
333  return Teuchos::idempotentlyGetEnvironmentVariable(
334  value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
335  defaultValue);
336 }
337 
339 #ifdef HAVE_TPETRA_INST_CUDA
340  constexpr size_t defaultValue(16);
341 #else
342  constexpr size_t defaultValue(256);
343 #endif
344 
345  static size_t value_ = defaultValue;
346  static bool initialized_ = false;
347  return Teuchos::idempotentlyGetEnvironmentVariable(
348  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
349  defaultValue);
350 }
351 
353 #ifdef HAVE_TPETRA_INST_CUDA
354  const size_t defaultValue(16);
355 #else
356  const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
357 #endif
358 
359  static size_t value_ = defaultValue;
360  static bool initialized_ = false;
361  return Teuchos::idempotentlyGetEnvironmentVariable(
362  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
363  defaultValue);
364 }
365 
367  constexpr bool defaultValue(false);
368 
369  static bool value_ = defaultValue;
370  static bool initialized_ = false;
371  return Teuchos::idempotentlyGetEnvironmentVariable(
372  value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
373 }
374 
376  constexpr bool defaultValue(false);
377 
378  static bool value_ = defaultValue;
379  static bool initialized_ = false;
380  return Teuchos::idempotentlyGetEnvironmentVariable(
381  value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
382  defaultValue);
383 }
384 
385 bool Behavior::debug(const char name[]) {
386  constexpr bool defaultValue = false;
387 
388  static bool initialized_ = false;
389  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
390  name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
391 }
392 
393 bool Behavior::verbose(const char name[]) {
394  if (BehaviorDetails::verboseDisabled_)
395  return false;
396 
397  constexpr bool defaultValue = false;
398 
399  static bool initialized_ = false;
400  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
401  name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
402 }
403 
405  BehaviorDetails::verboseDisabled_ = false;
406 }
407 
409  BehaviorDetails::verboseDisabled_ = true;
410 }
411 
412 bool Behavior::timing(const char name[]) {
413  if (BehaviorDetails::timingDisabled_)
414  return false;
415 
416  constexpr bool defaultValue = false;
417 
418  static bool initialized_ = false;
419  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
420  name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
421 }
422 
423 void Behavior::enable_timing() { BehaviorDetails::timingDisabled_ = false; }
424 
425 void Behavior::disable_timing() { BehaviorDetails::timingDisabled_ = true; }
426 
428  constexpr bool defaultValue = hierarchicalUnpackDefault();
429 
430  static bool value_ = defaultValue;
431  static bool initialized_ = false;
432  return Teuchos::idempotentlyGetEnvironmentVariable(
433  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
434 }
435 
437  constexpr bool defaultValue(false);
438 
439  static bool value_ = defaultValue;
440  static bool initialized_ = false;
441  return Teuchos::idempotentlyGetEnvironmentVariable(
442  value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
443  defaultValue);
444 }
445 
447 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
448  defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
449  defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
450  constexpr bool defaultValue(false);
451 #else
452  constexpr bool defaultValue(true);
453 #endif
454 
455  static bool value_ = defaultValue;
456  static bool initialized_ = false;
457  return Teuchos::idempotentlyGetEnvironmentVariable(
458  value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
459 }
460 
462  constexpr bool defaultValue(false);
463 
464  static bool value_ = defaultValue;
465  static bool initialized_ = false;
466  return Teuchos::idempotentlyGetEnvironmentVariable(
467  value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
468 }
469 
471  const std::string defaultValue("Send");
472 
473  static std::string value_ = defaultValue;
474  static bool initialized_ = false;
475  return Teuchos::idempotentlyGetEnvironmentVariable(
476  value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
477 }
478 
480  constexpr bool defaultValue(false);
481 
482  static bool value_ = defaultValue;
483  static bool initialized_ = false;
484  return Teuchos::idempotentlyGetEnvironmentVariable(
485  value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
486 }
487 
489  constexpr size_t defaultValue(16);
490 
491  static size_t value_ = defaultValue;
492  static bool initialized_ = false;
493  return Teuchos::idempotentlyGetEnvironmentVariable(
494  value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
495  defaultValue);
496 }
497 
499  constexpr bool defaultValue(false);
500 
501  static bool value_ = defaultValue;
502  static bool initialized_ = false;
503  return Teuchos::idempotentlyGetEnvironmentVariable(
504  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
505  defaultValue);
506 }
507 
509  constexpr bool defaultValue(false);
510 
511  static bool value_ = defaultValue;
512  static bool initialized_ = false;
513  return Teuchos::idempotentlyGetEnvironmentVariable(
514  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
515  defaultValue);
516 }
517 
519  constexpr bool defaultValue(false);
520 
521  static bool value_ = defaultValue;
522  static bool initialized_ = false;
523  return Teuchos::idempotentlyGetEnvironmentVariable(
524  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
525  defaultValue);
526 }
527 
529  constexpr bool defaultValue(false);
530 
531  static bool value_ = defaultValue;
532  static bool initialized_ = false;
533  return Teuchos::idempotentlyGetEnvironmentVariable(
534  value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
535 }
536 
538  constexpr bool defaultValue(false);
539 
540  static bool value_ = defaultValue;
541  static bool initialized_ = false;
542  return Teuchos::idempotentlyGetEnvironmentVariable(
543  value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
544  defaultValue);
545 }
546 
547 } // namespace Details
548 } // namespace Tpetra
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is &quot;imbalanced&quot; in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static std::string defaultSendType()
Default send type.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.