Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_Behavior.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_EnvVariables.hpp"
11 #include "Teuchos_OrdinalTraits.hpp"
12 #include "Teuchos_TestForException.hpp"
13 #include "TpetraCore_config.h"
15 #include "KokkosKernels_config.h" // for TPL enable macros
16 #include <array>
17 #include <functional>
18 #include <map>
19 
54 // environ should be available on posix platforms
55 #if not(defined(WIN) && (_MSC_VER >= 1900))
56 // needs to be in the global namespace
57 extern char **environ;
58 #endif
59 
60 namespace Tpetra {
61 namespace Details {
62 
63 namespace BehaviorDetails {
64 
65 constexpr const std::string_view RESERVED_PREFIX = "TPETRA_";
66 constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67  "TPETRA_ASSUME_GPU_AWARE_MPI";
68 constexpr const std::string_view CUDA_LAUNCH_BLOCKING = "CUDA_LAUNCH_BLOCKING";
69 constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70  "MM_TAFC_OptimizationCoreCount";
71 constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72  "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73 constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74  "TPETRA_ROW_IMBALANCE_THRESHOLD";
75 constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76  "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77 constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78  "TPETRA_VECTOR_DEVICE_THRESHOLD";
79 constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80  "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81 constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82  "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83 constexpr const std::string_view USE_TEUCHOS_TIMERS =
84  "TPETRA_USE_TEUCHOS_TIMERS";
85 constexpr const std::string_view USE_KOKKOS_PROFILING =
86  "TPETRA_USE_KOKKOS_PROFILING";
87 constexpr const std::string_view DEBUG = "TPETRA_DEBUG";
88 constexpr const std::string_view VERBOSE = "TPETRA_VERBOSE";
89 constexpr const std::string_view TIMING = "TPETRA_TIMING";
90 constexpr const std::string_view HIERARCHICAL_UNPACK =
91  "TPETRA_HIERARCHICAL_UNPACK";
92 constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93  "TPETRA_SKIP_COPY_AND_PERMUTE";
94 constexpr const std::string_view FUSED_RESIDUAL = "TPETRA_FUSED_RESIDUAL";
95 constexpr const std::string_view OVERLAP = "TPETRA_OVERLAP";
96  constexpr const std::string_view DEFAULT_SEND_TYPE = "TPETRA_DEFAULT_SEND_TYPE";
97 constexpr const std::string_view GRANULAR_TRANSFERS = "TPETRA_GRANULAR_TRANSFERS";
98 constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99  "TPETRA_SPACES_ID_WARN_LIMIT";
100 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101  "TPETRA_TIME_KOKKOS_DEEP_COPY";
102 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103  "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104 constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105  "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106 constexpr const std::string_view TIME_KOKKOS_FENCE = "TPETRA_TIME_KOKKOS_FENCE";
107 constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108  "TPETRA_TIME_KOKKOS_FUNCTIONS";
109 
110 // construct an std::array of string_view with any number of provided
111 // string_views
112 template <typename... Elems>
113 constexpr std::array<std::string_view, sizeof...(Elems)>
114 make_array(Elems &&... elems) {
115  return {std::forward<Elems>(elems)...};
116 }
117 
118 constexpr const auto RECOGNIZED_VARS = make_array(
119  ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120  VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121  MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122  HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123  USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124  HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125  DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126  SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127  TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
128 
129 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130 bool verboseDisabled_ = false;
131 bool timingDisabled_ = false;
132 }
133 
134 namespace { // (anonymous)
135 
136  void
137  split(const std::string_view s,
138  std::function<void(const std::string&)> f,
139  const char sep=',')
140  {
141  typedef std::string::size_type size_type;
142  size_type cur_pos, last_pos=0, length=s.length();
143  while(last_pos < length + 1)
144  {
145  cur_pos = s.find_first_of(sep, last_pos);
146  if(cur_pos == std::string::npos)
147  {
148  cur_pos = length;
149  }
150  if(cur_pos!=last_pos) {
151  auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
152  f(token);
153  }
154  last_pos = cur_pos + 1;
155  }
156  return;
157  }
158 
159  constexpr bool debugDefault () {
160 #ifdef HAVE_TPETRA_DEBUG
161  return true;
162 #else
163  return false;
164 #endif // HAVE_TPETRA_DEBUG
165  }
166 
167  constexpr bool verboseDefault () {
168  return false;
169  }
170 
171  constexpr bool timingDefault () {
172  return false;
173  }
174 
175  constexpr bool assumeMpiIsGPUAwareDefault () {
176 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
177  return true;
178 #else
179  return false;
180 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
181  }
182 
183  constexpr bool cudaLaunchBlockingDefault () {
184  return false;
185  }
186 
187  constexpr bool hierarchicalUnpackDefault () {
188  return true;
189  }
190 
191 } // namespace (anonymous)
192 
194 
195  static bool once = false;
196 
197  if (!once) {
198  const char prefix[] = "Tpetra::Details::Behavior: ";
199  char **env;
200 #if defined(WIN) && (_MSC_VER >= 1900)
201  env = *__p__environ();
202 #else
203  env = environ; // defined at the top of this file as extern char **environ;
204 #endif
205  for (; *env; ++env) {
206 
207  std::string name;
208  std::string value;
209  const std::string_view ev(*env);
210 
211  // split name=value on the first =, everything before = is name
212  split(
213  ev,
214  [&](const std::string &s) {
215  if (name.empty()) {
216  name = s;
217  } else {
218  value = s;
219  }
220  },
221  '=');
222 
223  if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
224  name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
225  BehaviorDetails::RESERVED_PREFIX) {
226  const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
227  BehaviorDetails::RECOGNIZED_VARS.end(), name);
228  TEUCHOS_TEST_FOR_EXCEPTION(
229  it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
230  prefix << "Environment "
231  "variable \""
232  << name << "\" (prefixed with \""
233  << BehaviorDetails::RESERVED_PREFIX
234  << "\") is not a recognized Tpetra variable.");
235  }
236  }
237 
238  once = true;
239  }
240 }
241 
243  constexpr bool defaultValue = debugDefault();
244 
245  static bool value_ = defaultValue;
246  static bool initialized_ = false;
247  return Teuchos::idempotentlyGetEnvironmentVariable(
248  value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
249 }
250 
252  if (BehaviorDetails::verboseDisabled_)
253  return false;
254 
255  constexpr bool defaultValue = verboseDefault();
256 
257  static bool value_ = defaultValue;
258  static bool initialized_ = false;
259  return Teuchos::idempotentlyGetEnvironmentVariable(
260  value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
261 }
262 
264  if (BehaviorDetails::timingDisabled_)
265  return false;
266 
267  constexpr bool defaultValue = timingDefault();
268 
269  static bool value_ = defaultValue;
270  static bool initialized_ = false;
271  return Teuchos::idempotentlyGetEnvironmentVariable(
272  value_, initialized_, BehaviorDetails::TIMING, defaultValue);
273 }
274 
276  constexpr bool defaultValue = assumeMpiIsGPUAwareDefault();
277 
278  static bool value_ = defaultValue;
279  static bool initialized_ = false;
280  return Teuchos::idempotentlyGetEnvironmentVariable(
281  value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
282  defaultValue);
283 }
284 
286  constexpr bool defaultValue = cudaLaunchBlockingDefault();
287 
288  static bool value_ = defaultValue;
289  static bool initialized_ = false;
290  return Teuchos::idempotentlyGetEnvironmentVariable(
291  value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
292  defaultValue);
293 }
294 
296  constexpr int _default = 3000;
297  static int value_ = _default;
298  static bool initialized_ = false;
299  return Teuchos::idempotentlyGetEnvironmentVariable(
300  value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
301  _default);
302 }
303 
305  constexpr size_t defaultValue(200);
306 
307  static size_t value_ = defaultValue;
308  static bool initialized_ = false;
309  return Teuchos::idempotentlyGetEnvironmentVariable(
310  value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
311  defaultValue);
312 }
313 
315  constexpr size_t defaultValue(256);
316 
317  static size_t value_ = defaultValue;
318  static bool initialized_ = false;
319  return Teuchos::idempotentlyGetEnvironmentVariable(
320  value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
321  defaultValue);
322 }
323 
325  constexpr bool defaultValue = false;
326 
327  static bool value_ = defaultValue;
328  static bool initialized_ = false;
329  return Teuchos::idempotentlyGetEnvironmentVariable(
330  value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
331  defaultValue);
332 }
333 
335  constexpr size_t defaultValue(22000);
336 
337  static size_t value_ = defaultValue;
338  static bool initialized_ = false;
339  return Teuchos::idempotentlyGetEnvironmentVariable(
340  value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
341  defaultValue);
342 }
343 
345 
346 #ifdef HAVE_TPETRA_INST_CUDA
347  constexpr size_t defaultValue(16);
348 #else
349  constexpr size_t defaultValue(256);
350 #endif
351 
352  static size_t value_ = defaultValue;
353  static bool initialized_ = false;
354  return Teuchos::idempotentlyGetEnvironmentVariable(
355  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
356  defaultValue);
357 }
358 
360 #ifdef HAVE_TPETRA_INST_CUDA
361  const size_t defaultValue(16);
362 #else
363  const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
364 #endif
365 
366  static size_t value_ = defaultValue;
367  static bool initialized_ = false;
368  return Teuchos::idempotentlyGetEnvironmentVariable(
369  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
370  defaultValue);
371 }
372 
374  constexpr bool defaultValue(false);
375 
376  static bool value_ = defaultValue;
377  static bool initialized_ = false;
378  return Teuchos::idempotentlyGetEnvironmentVariable(
379  value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
380 }
381 
383  constexpr bool defaultValue(false);
384 
385  static bool value_ = defaultValue;
386  static bool initialized_ = false;
387  return Teuchos::idempotentlyGetEnvironmentVariable(
388  value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
389  defaultValue);
390 }
391 
392 bool Behavior::debug(const char name[]) {
393  constexpr bool defaultValue = false;
394 
395  static bool initialized_ = false;
396  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
397  name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
398 }
399 
400 bool Behavior::verbose(const char name[]) {
401  if (BehaviorDetails::verboseDisabled_)
402  return false;
403 
404  constexpr bool defaultValue = false;
405 
406  static bool initialized_ = false;
407  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
408  name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
409 }
410 
412  BehaviorDetails::verboseDisabled_ = false;
413 }
414 
416  BehaviorDetails::verboseDisabled_ = true;
417 }
418 
419 bool Behavior::timing(const char name[]) {
420  if (BehaviorDetails::timingDisabled_)
421  return false;
422 
423  constexpr bool defaultValue = false;
424 
425  static bool initialized_ = false;
426  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
427  name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
428 }
429 
430 void Behavior::enable_timing() { BehaviorDetails::timingDisabled_ = false; }
431 
432 void Behavior::disable_timing() { BehaviorDetails::timingDisabled_ = true; }
433 
435  constexpr bool defaultValue = hierarchicalUnpackDefault();
436 
437  static bool value_ = defaultValue;
438  static bool initialized_ = false;
439  return Teuchos::idempotentlyGetEnvironmentVariable(
440  value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
441 }
442 
444  constexpr bool defaultValue(false);
445 
446  static bool value_ = defaultValue;
447  static bool initialized_ = false;
448  return Teuchos::idempotentlyGetEnvironmentVariable(
449  value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
450  defaultValue);
451 }
452 
454 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
455  defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
456  defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
457  constexpr bool defaultValue(false);
458 #else
459  constexpr bool defaultValue(true);
460 #endif
461 
462  static bool value_ = defaultValue;
463  static bool initialized_ = false;
464  return Teuchos::idempotentlyGetEnvironmentVariable(
465  value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
466 }
467 
469  constexpr bool defaultValue(false);
470 
471  static bool value_ = defaultValue;
472  static bool initialized_ = false;
473  return Teuchos::idempotentlyGetEnvironmentVariable(
474  value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
475 }
476 
478  const std::string defaultValue("Send");
479 
480  static std::string value_ = defaultValue;
481  static bool initialized_ = false;
482  return Teuchos::idempotentlyGetEnvironmentVariable(
483  value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
484 }
485 
487  constexpr bool defaultValue(false);
488 
489  static bool value_ = defaultValue;
490  static bool initialized_ = false;
491  return Teuchos::idempotentlyGetEnvironmentVariable(
492  value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
493 }
494 
496  constexpr size_t defaultValue(16);
497 
498  static size_t value_ = defaultValue;
499  static bool initialized_ = false;
500  return Teuchos::idempotentlyGetEnvironmentVariable(
501  value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
502  defaultValue);
503 }
504 
506  constexpr bool defaultValue(false);
507 
508  static bool value_ = defaultValue;
509  static bool initialized_ = false;
510  return Teuchos::idempotentlyGetEnvironmentVariable(
511  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
512  defaultValue);
513 }
514 
516  constexpr bool defaultValue(false);
517 
518  static bool value_ = defaultValue;
519  static bool initialized_ = false;
520  return Teuchos::idempotentlyGetEnvironmentVariable(
521  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
522  defaultValue);
523 }
524 
526  constexpr bool defaultValue(false);
527 
528  static bool value_ = defaultValue;
529  static bool initialized_ = false;
530  return Teuchos::idempotentlyGetEnvironmentVariable(
531  value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
532  defaultValue);
533 }
534 
536  constexpr bool defaultValue(false);
537 
538  static bool value_ = defaultValue;
539  static bool initialized_ = false;
540  return Teuchos::idempotentlyGetEnvironmentVariable(
541  value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
542 }
543 
545  constexpr bool defaultValue(false);
546 
547  static bool value_ = defaultValue;
548  static bool initialized_ = false;
549  return Teuchos::idempotentlyGetEnvironmentVariable(
550  value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
551  defaultValue);
552 }
553 
554 } // namespace Details
555 } // namespace Tpetra
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is &quot;imbalanced&quot; in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static std::string defaultSendType()
Default send type.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.