10 #include "Teuchos_EnvVariables.hpp"
11 #include "Teuchos_OrdinalTraits.hpp"
12 #include "Teuchos_TestForException.hpp"
13 #include "TpetraCore_config.h"
15 #include "KokkosKernels_config.h"
55 #if not(defined(WIN) && (_MSC_VER >= 1900))
57 extern char **environ;
63 namespace BehaviorDetails {
65 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
66 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
69 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85 constexpr
const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
88 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
89 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
90 constexpr
const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
95 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
96 constexpr
const std::string_view DEFAULT_SEND_TYPE =
"TPETRA_DEFAULT_SEND_TYPE";
97 constexpr
const std::string_view GRANULAR_TRANSFERS =
"TPETRA_GRANULAR_TRANSFERS";
98 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
107 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
112 template <
typename... Elems>
113 constexpr std::array<std::string_view,
sizeof...(Elems)>
114 make_array(Elems &&... elems) {
115 return {std::forward<Elems>(elems)...};
118 constexpr
const auto RECOGNIZED_VARS = make_array(
119 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
129 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130 bool verboseDisabled_ =
false;
131 bool timingDisabled_ =
false;
137 split(
const std::string_view s,
138 std::function<
void(
const std::string&)> f,
141 typedef std::string::size_type size_type;
142 size_type cur_pos, last_pos=0, length=s.length();
143 while(last_pos < length + 1)
145 cur_pos = s.find_first_of(sep, last_pos);
146 if(cur_pos == std::string::npos)
150 if(cur_pos!=last_pos) {
151 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
154 last_pos = cur_pos + 1;
159 constexpr
bool debugDefault () {
160 #ifdef HAVE_TPETRA_DEBUG
164 #endif // HAVE_TPETRA_DEBUG
167 constexpr
bool verboseDefault () {
171 constexpr
bool timingDefault () {
175 constexpr
bool assumeMpiIsGPUAwareDefault () {
176 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
180 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
183 constexpr
bool cudaLaunchBlockingDefault () {
187 constexpr
bool hierarchicalUnpackDefault () {
195 static bool once =
false;
198 const char prefix[] =
"Tpetra::Details::Behavior: ";
200 #if defined(WIN) && (_MSC_VER >= 1900)
201 env = *__p__environ();
205 for (; *env; ++env) {
209 const std::string_view ev(*env);
214 [&](
const std::string &s) {
223 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
224 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
225 BehaviorDetails::RESERVED_PREFIX) {
226 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
227 BehaviorDetails::RECOGNIZED_VARS.end(), name);
228 TEUCHOS_TEST_FOR_EXCEPTION(
229 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
230 prefix <<
"Environment "
232 << name <<
"\" (prefixed with \""
233 << BehaviorDetails::RESERVED_PREFIX
234 <<
"\") is not a recognized Tpetra variable.");
243 constexpr
bool defaultValue = debugDefault();
245 static bool value_ = defaultValue;
246 static bool initialized_ =
false;
247 return Teuchos::idempotentlyGetEnvironmentVariable(
248 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
252 if (BehaviorDetails::verboseDisabled_)
255 constexpr
bool defaultValue = verboseDefault();
257 static bool value_ = defaultValue;
258 static bool initialized_ =
false;
259 return Teuchos::idempotentlyGetEnvironmentVariable(
260 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
264 if (BehaviorDetails::timingDisabled_)
267 constexpr
bool defaultValue = timingDefault();
269 static bool value_ = defaultValue;
270 static bool initialized_ =
false;
271 return Teuchos::idempotentlyGetEnvironmentVariable(
272 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
276 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
278 static bool value_ = defaultValue;
279 static bool initialized_ =
false;
280 return Teuchos::idempotentlyGetEnvironmentVariable(
281 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
286 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
288 static bool value_ = defaultValue;
289 static bool initialized_ =
false;
290 return Teuchos::idempotentlyGetEnvironmentVariable(
291 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
296 constexpr
int _default = 3000;
297 static int value_ = _default;
298 static bool initialized_ =
false;
299 return Teuchos::idempotentlyGetEnvironmentVariable(
300 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
305 constexpr
size_t defaultValue(200);
307 static size_t value_ = defaultValue;
308 static bool initialized_ =
false;
309 return Teuchos::idempotentlyGetEnvironmentVariable(
310 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
315 constexpr
size_t defaultValue(256);
317 static size_t value_ = defaultValue;
318 static bool initialized_ =
false;
319 return Teuchos::idempotentlyGetEnvironmentVariable(
320 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
325 constexpr
bool defaultValue =
false;
327 static bool value_ = defaultValue;
328 static bool initialized_ =
false;
329 return Teuchos::idempotentlyGetEnvironmentVariable(
330 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
335 constexpr
size_t defaultValue(22000);
337 static size_t value_ = defaultValue;
338 static bool initialized_ =
false;
339 return Teuchos::idempotentlyGetEnvironmentVariable(
340 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
346 #ifdef HAVE_TPETRA_INST_CUDA
347 constexpr
size_t defaultValue(16);
349 constexpr
size_t defaultValue(256);
352 static size_t value_ = defaultValue;
353 static bool initialized_ =
false;
354 return Teuchos::idempotentlyGetEnvironmentVariable(
355 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
360 #ifdef HAVE_TPETRA_INST_CUDA
361 const size_t defaultValue(16);
363 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
366 static size_t value_ = defaultValue;
367 static bool initialized_ =
false;
368 return Teuchos::idempotentlyGetEnvironmentVariable(
369 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
374 constexpr
bool defaultValue(
false);
376 static bool value_ = defaultValue;
377 static bool initialized_ =
false;
378 return Teuchos::idempotentlyGetEnvironmentVariable(
379 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
383 constexpr
bool defaultValue(
false);
385 static bool value_ = defaultValue;
386 static bool initialized_ =
false;
387 return Teuchos::idempotentlyGetEnvironmentVariable(
388 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
393 constexpr
bool defaultValue =
false;
395 static bool initialized_ =
false;
396 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
397 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
401 if (BehaviorDetails::verboseDisabled_)
404 constexpr
bool defaultValue =
false;
406 static bool initialized_ =
false;
407 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
408 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
412 BehaviorDetails::verboseDisabled_ =
false;
416 BehaviorDetails::verboseDisabled_ =
true;
420 if (BehaviorDetails::timingDisabled_)
423 constexpr
bool defaultValue =
false;
425 static bool initialized_ =
false;
426 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
427 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
435 constexpr
bool defaultValue = hierarchicalUnpackDefault();
437 static bool value_ = defaultValue;
438 static bool initialized_ =
false;
439 return Teuchos::idempotentlyGetEnvironmentVariable(
440 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
444 constexpr
bool defaultValue(
false);
446 static bool value_ = defaultValue;
447 static bool initialized_ =
false;
448 return Teuchos::idempotentlyGetEnvironmentVariable(
449 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
454 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
455 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
456 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
457 constexpr
bool defaultValue(
false);
459 constexpr
bool defaultValue(
true);
462 static bool value_ = defaultValue;
463 static bool initialized_ =
false;
464 return Teuchos::idempotentlyGetEnvironmentVariable(
465 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
469 constexpr
bool defaultValue(
false);
471 static bool value_ = defaultValue;
472 static bool initialized_ =
false;
473 return Teuchos::idempotentlyGetEnvironmentVariable(
474 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
478 const std::string defaultValue(
"Send");
480 static std::string value_ = defaultValue;
481 static bool initialized_ =
false;
482 return Teuchos::idempotentlyGetEnvironmentVariable(
483 value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
487 constexpr
bool defaultValue(
false);
489 static bool value_ = defaultValue;
490 static bool initialized_ =
false;
491 return Teuchos::idempotentlyGetEnvironmentVariable(
492 value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
496 constexpr
size_t defaultValue(16);
498 static size_t value_ = defaultValue;
499 static bool initialized_ =
false;
500 return Teuchos::idempotentlyGetEnvironmentVariable(
501 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
506 constexpr
bool defaultValue(
false);
508 static bool value_ = defaultValue;
509 static bool initialized_ =
false;
510 return Teuchos::idempotentlyGetEnvironmentVariable(
511 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
516 constexpr
bool defaultValue(
false);
518 static bool value_ = defaultValue;
519 static bool initialized_ =
false;
520 return Teuchos::idempotentlyGetEnvironmentVariable(
521 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
526 constexpr
bool defaultValue(
false);
528 static bool value_ = defaultValue;
529 static bool initialized_ =
false;
530 return Teuchos::idempotentlyGetEnvironmentVariable(
531 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
536 constexpr
bool defaultValue(
false);
538 static bool value_ = defaultValue;
539 static bool initialized_ =
false;
540 return Teuchos::idempotentlyGetEnvironmentVariable(
541 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
545 constexpr
bool defaultValue(
false);
547 static bool value_ = defaultValue;
548 static bool initialized_ =
false;
549 return Teuchos::idempotentlyGetEnvironmentVariable(
550 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static std::string defaultSendType()
Default send type.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.