10 #include "Teuchos_EnvVariables.hpp"
11 #include "Teuchos_OrdinalTraits.hpp"
12 #include "Teuchos_TestForException.hpp"
13 #include "TpetraCore_config.h"
15 #include "KokkosKernels_config.h"
55 #if not(defined(WIN) && (_MSC_VER >= 1900))
57 extern char **environ;
63 namespace BehaviorDetails {
65 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
66 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
69 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85 constexpr
const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
88 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
89 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
90 constexpr
const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
95 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
96 constexpr
const std::string_view DEFAULT_SEND_TYPE =
"TPETRA_DEFAULT_SEND_TYPE";
97 constexpr
const std::string_view GRANULAR_TRANSFERS =
"TPETRA_GRANULAR_TRANSFERS";
98 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
107 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
112 template <
typename... Elems>
113 constexpr std::array<std::string_view,
sizeof...(Elems)>
114 make_array(Elems &&...elems) {
115 return {std::forward<Elems>(elems)...};
118 constexpr
const auto RECOGNIZED_VARS = make_array(
119 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
129 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130 bool verboseDisabled_ =
false;
131 bool timingDisabled_ =
false;
136 void split(
const std::string_view s,
137 std::function<
void(
const std::string &)> f,
138 const char sep =
',') {
139 typedef std::string::size_type size_type;
140 size_type cur_pos, last_pos = 0, length = s.length();
141 while (last_pos < length + 1) {
142 cur_pos = s.find_first_of(sep, last_pos);
143 if (cur_pos == std::string::npos) {
146 if (cur_pos != last_pos) {
147 auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
150 last_pos = cur_pos + 1;
155 constexpr
bool debugDefault() {
156 #ifdef HAVE_TPETRA_DEBUG
160 #endif // HAVE_TPETRA_DEBUG
163 constexpr
bool verboseDefault() {
167 constexpr
bool timingDefault() {
171 constexpr
bool assumeMpiIsGPUAwareDefault() {
172 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
176 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
179 constexpr
bool cudaLaunchBlockingDefault() {
183 constexpr
bool hierarchicalUnpackDefault() {
190 static bool once =
false;
193 const char prefix[] =
"Tpetra::Details::Behavior: ";
195 #if defined(WIN) && (_MSC_VER >= 1900)
196 env = *__p__environ();
200 for (; *env; ++env) {
203 const std::string_view ev(*env);
208 [&](
const std::string &s) {
217 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
218 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
219 BehaviorDetails::RESERVED_PREFIX) {
220 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
221 BehaviorDetails::RECOGNIZED_VARS.end(), name);
222 TEUCHOS_TEST_FOR_EXCEPTION(
223 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
224 prefix <<
"Environment "
226 << name <<
"\" (prefixed with \""
227 << BehaviorDetails::RESERVED_PREFIX
228 <<
"\") is not a recognized Tpetra variable.");
237 constexpr
bool defaultValue = debugDefault();
239 static bool value_ = defaultValue;
240 static bool initialized_ =
false;
241 return Teuchos::idempotentlyGetEnvironmentVariable(
242 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
246 if (BehaviorDetails::verboseDisabled_)
249 constexpr
bool defaultValue = verboseDefault();
251 static bool value_ = defaultValue;
252 static bool initialized_ =
false;
253 return Teuchos::idempotentlyGetEnvironmentVariable(
254 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
258 if (BehaviorDetails::timingDisabled_)
261 constexpr
bool defaultValue = timingDefault();
263 static bool value_ = defaultValue;
264 static bool initialized_ =
false;
265 return Teuchos::idempotentlyGetEnvironmentVariable(
266 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
270 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
272 static bool value_ = defaultValue;
273 static bool initialized_ =
false;
274 return Teuchos::idempotentlyGetEnvironmentVariable(
275 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
280 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
282 static bool value_ = defaultValue;
283 static bool initialized_ =
false;
284 return Teuchos::idempotentlyGetEnvironmentVariable(
285 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
290 constexpr
int _default = 3000;
291 static int value_ = _default;
292 static bool initialized_ =
false;
293 return Teuchos::idempotentlyGetEnvironmentVariable(
294 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
299 constexpr
size_t defaultValue(200);
301 static size_t value_ = defaultValue;
302 static bool initialized_ =
false;
303 return Teuchos::idempotentlyGetEnvironmentVariable(
304 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
309 constexpr
size_t defaultValue(256);
311 static size_t value_ = defaultValue;
312 static bool initialized_ =
false;
313 return Teuchos::idempotentlyGetEnvironmentVariable(
314 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
319 constexpr
bool defaultValue =
false;
321 static bool value_ = defaultValue;
322 static bool initialized_ =
false;
323 return Teuchos::idempotentlyGetEnvironmentVariable(
324 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
329 constexpr
size_t defaultValue(22000);
331 static size_t value_ = defaultValue;
332 static bool initialized_ =
false;
333 return Teuchos::idempotentlyGetEnvironmentVariable(
334 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
339 #ifdef HAVE_TPETRA_INST_CUDA
340 constexpr
size_t defaultValue(16);
342 constexpr
size_t defaultValue(256);
345 static size_t value_ = defaultValue;
346 static bool initialized_ =
false;
347 return Teuchos::idempotentlyGetEnvironmentVariable(
348 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
353 #ifdef HAVE_TPETRA_INST_CUDA
354 const size_t defaultValue(16);
356 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
359 static size_t value_ = defaultValue;
360 static bool initialized_ =
false;
361 return Teuchos::idempotentlyGetEnvironmentVariable(
362 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
367 constexpr
bool defaultValue(
false);
369 static bool value_ = defaultValue;
370 static bool initialized_ =
false;
371 return Teuchos::idempotentlyGetEnvironmentVariable(
372 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
376 constexpr
bool defaultValue(
false);
378 static bool value_ = defaultValue;
379 static bool initialized_ =
false;
380 return Teuchos::idempotentlyGetEnvironmentVariable(
381 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
386 constexpr
bool defaultValue =
false;
388 static bool initialized_ =
false;
389 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
390 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
394 if (BehaviorDetails::verboseDisabled_)
397 constexpr
bool defaultValue =
false;
399 static bool initialized_ =
false;
400 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
401 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
405 BehaviorDetails::verboseDisabled_ =
false;
409 BehaviorDetails::verboseDisabled_ =
true;
413 if (BehaviorDetails::timingDisabled_)
416 constexpr
bool defaultValue =
false;
418 static bool initialized_ =
false;
419 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
420 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
428 constexpr
bool defaultValue = hierarchicalUnpackDefault();
430 static bool value_ = defaultValue;
431 static bool initialized_ =
false;
432 return Teuchos::idempotentlyGetEnvironmentVariable(
433 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
437 constexpr
bool defaultValue(
false);
439 static bool value_ = defaultValue;
440 static bool initialized_ =
false;
441 return Teuchos::idempotentlyGetEnvironmentVariable(
442 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
447 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
448 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
449 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
450 constexpr
bool defaultValue(
false);
452 constexpr
bool defaultValue(
true);
455 static bool value_ = defaultValue;
456 static bool initialized_ =
false;
457 return Teuchos::idempotentlyGetEnvironmentVariable(
458 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
462 constexpr
bool defaultValue(
false);
464 static bool value_ = defaultValue;
465 static bool initialized_ =
false;
466 return Teuchos::idempotentlyGetEnvironmentVariable(
467 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
471 const std::string defaultValue(
"Send");
473 static std::string value_ = defaultValue;
474 static bool initialized_ =
false;
475 return Teuchos::idempotentlyGetEnvironmentVariable(
476 value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
480 constexpr
bool defaultValue(
false);
482 static bool value_ = defaultValue;
483 static bool initialized_ =
false;
484 return Teuchos::idempotentlyGetEnvironmentVariable(
485 value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
489 constexpr
size_t defaultValue(16);
491 static size_t value_ = defaultValue;
492 static bool initialized_ =
false;
493 return Teuchos::idempotentlyGetEnvironmentVariable(
494 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
499 constexpr
bool defaultValue(
false);
501 static bool value_ = defaultValue;
502 static bool initialized_ =
false;
503 return Teuchos::idempotentlyGetEnvironmentVariable(
504 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
509 constexpr
bool defaultValue(
false);
511 static bool value_ = defaultValue;
512 static bool initialized_ =
false;
513 return Teuchos::idempotentlyGetEnvironmentVariable(
514 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
519 constexpr
bool defaultValue(
false);
521 static bool value_ = defaultValue;
522 static bool initialized_ =
false;
523 return Teuchos::idempotentlyGetEnvironmentVariable(
524 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
529 constexpr
bool defaultValue(
false);
531 static bool value_ = defaultValue;
532 static bool initialized_ =
false;
533 return Teuchos::idempotentlyGetEnvironmentVariable(
534 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
538 constexpr
bool defaultValue(
false);
540 static bool value_ = defaultValue;
541 static bool initialized_ =
false;
542 return Teuchos::idempotentlyGetEnvironmentVariable(
543 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static std::string defaultSendType()
Default send type.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.