19 #include <string_view>
22 #include "Teuchos_OrdinalTraits.hpp"
23 #include "Teuchos_TestForException.hpp"
24 #include "TpetraCore_config.h"
26 #include "KokkosKernels_config.h"
63 #if not(defined(WIN) && (_MSC_VER >= 1900))
65 extern char **environ;
71 namespace BehaviorDetails {
73 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
74 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
75 "TPETRA_ASSUME_GPU_AWARE_MPI";
76 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
77 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
78 "MM_TAFC_OptimizationCoreCount";
79 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
80 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
81 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
82 "TPETRA_ROW_IMBALANCE_THRESHOLD";
83 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
84 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
85 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
86 "TPETRA_VECTOR_DEVICE_THRESHOLD";
87 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
88 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
89 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
90 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
91 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
92 "TPETRA_USE_TEUCHOS_TIMERS";
93 constexpr
const std::string_view USE_KOKKOS_PROFILING =
94 "TPETRA_USE_KOKKOS_PROFILING";
95 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
96 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
97 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
98 constexpr
const std::string_view HIERARCHICAL_UNPACK =
99 "TPETRA_HIERARCHICAL_UNPACK";
100 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
101 "TPETRA_SKIP_COPY_AND_PERMUTE";
102 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
103 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
104 constexpr
const std::string_view DEFAULT_SEND_TYPE =
"TPETRA_DEFAULT_SEND_TYPE";
105 constexpr
const std::string_view GRANULAR_TRANSFERS =
"TPETRA_GRANULAR_TRANSFERS";
106 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
107 "TPETRA_SPACES_ID_WARN_LIMIT";
108 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
109 "TPETRA_TIME_KOKKOS_DEEP_COPY";
110 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
111 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
112 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
113 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
114 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
115 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
116 "TPETRA_TIME_KOKKOS_FUNCTIONS";
120 template <
typename... Elems>
121 constexpr std::array<std::string_view,
sizeof...(Elems)>
122 make_array(Elems &&... elems) {
123 return {std::forward<Elems>(elems)...};
126 constexpr
const auto RECOGNIZED_VARS = make_array(
127 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
128 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
129 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
130 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
131 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
132 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
133 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
134 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
135 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
138 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
139 bool verboseDisabled_ =
false;
140 bool timingDisabled_ =
false;
145 enum EnvironmentVariableState
147 EnvironmentVariableIsSet_ON,
148 EnvironmentVariableIsSet_OFF,
149 EnvironmentVariableIsSet,
150 EnvironmentVariableIsNotSet
156 std::string stringToUpper (std::string s)
158 std::transform (s.begin (), s.end (), s.begin (),
159 [] (
unsigned char c) {
return std::toupper (c); });
164 split(
const std::string_view s,
165 std::function<
void(
const std::string&)> f,
168 typedef std::string::size_type size_type;
169 size_type cur_pos, last_pos=0, length=s.length();
170 while(last_pos < length + 1)
172 cur_pos = s.find_first_of(sep, last_pos);
173 if(cur_pos == std::string::npos)
177 if(cur_pos!=last_pos) {
178 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
181 last_pos = cur_pos + 1;
186 EnvironmentVariableState
187 environmentVariableState(
const std::string& environmentVariableValue)
189 std::string v = stringToUpper(environmentVariableValue);
190 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
192 return EnvironmentVariableIsSet_ON;
193 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
195 return EnvironmentVariableIsSet_OFF;
197 return EnvironmentVariableIsSet;
201 setEnvironmentVariableMap (
const char environmentVariableName[],
202 std::map<std::string,std::map<std::string, bool> >& valsMap,
203 const bool defaultValue)
211 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
213 const char* varVal = getenv (environmentVariableName);
214 if (varVal ==
nullptr) {
221 const string varStr(varVal);
222 vector<string> names;
223 split(varStr, [&](
const string& x){names.push_back(x);});
224 for (
auto const& name: names) {
225 auto state = environmentVariableState(name);
226 if (state == EnvironmentVariableIsSet_ON) {
229 valsMap[environmentVariableName][
"DEFAULT"] =
true;
231 else if (state == EnvironmentVariableIsSet_OFF) {
234 valsMap[environmentVariableName][
"DEFAULT"] =
false;
239 valsMap[environmentVariableName][name] =
true;
246 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
248 const char environmentVariableName[],
249 const bool defaultValue)
251 using BehaviorDetails::namedVariableMap_;
253 setEnvironmentVariableMap (environmentVariableName,
258 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
259 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
260 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
261 return thisEnvironmentVariable->second;
262 return thisEnvironmentVariableMap[
"DEFAULT"];
266 template <
typename T>
267 T getEnvironmentVariable(
const std::string_view environmentVariableName,
268 const T defaultValue) {
269 const char prefix[] =
"Tpetra::Details::Behavior: ";
271 const char *varVal = std::getenv(environmentVariableName.data());
272 if (varVal ==
nullptr) {
275 std::stringstream ss(varVal);
279 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
280 prefix <<
"Environment "
282 << environmentVariableName
286 <<
" that cannot be parsed as a "
287 <<
typeid(T).name() <<
".");
295 bool getEnvironmentVariable<bool>(
296 const std::string_view environmentVariableName,
const bool defaultValue) {
297 const char *varVal = std::getenv(environmentVariableName.data());
298 bool retVal = defaultValue;
299 if (varVal !=
nullptr) {
300 auto state = environmentVariableState(std::string(varVal));
301 if (state == EnvironmentVariableIsSet_ON)
303 else if (state == EnvironmentVariableIsSet_OFF)
316 getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
317 const size_t defaultValue) {
318 const char prefix[] =
"Tpetra::Details::Behavior: ";
320 const char *varVal = std::getenv(environmentVariableName.data());
321 if (varVal ==
nullptr) {
324 long long val = std::stoll(stringToUpper(varVal));
325 if (val < static_cast<long long>(0)) {
327 return std::numeric_limits<size_t>::max();
329 if (
sizeof(
long long) >
sizeof(
size_t)) {
333 constexpr
long long maxSizeT =
334 static_cast<long long>(std::numeric_limits<size_t>::max());
335 TEUCHOS_TEST_FOR_EXCEPTION(
336 val > maxSizeT, std::out_of_range,
337 prefix <<
"Environment "
339 << environmentVariableName
342 << val <<
" larger than the largest size_t value " << maxSizeT
345 return static_cast<size_t>(val);
349 template <
typename T>
350 T idempotentlyGetEnvironmentVariable(
351 T &value,
bool &initialized,
const std::string_view environmentVariableName,
352 const T defaultValue) {
354 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
361 constexpr
bool debugDefault () {
362 #ifdef HAVE_TPETRA_DEBUG
366 #endif // HAVE_TPETRA_DEBUG
369 constexpr
bool verboseDefault () {
373 constexpr
bool timingDefault () {
377 constexpr
bool assumeMpiIsGPUAwareDefault () {
378 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
382 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
385 constexpr
bool cudaLaunchBlockingDefault () {
389 constexpr
bool hierarchicalUnpackDefault () {
398 static bool once =
false;
401 const char prefix[] =
"Tpetra::Details::Behavior: ";
403 #if defined(WIN) && (_MSC_VER >= 1900)
404 env = *__p__environ();
408 for (; *env; ++env) {
412 const std::string_view ev(*env);
417 [&](
const std::string &s) {
426 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
427 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
428 BehaviorDetails::RESERVED_PREFIX) {
429 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
430 BehaviorDetails::RECOGNIZED_VARS.end(), name);
431 TEUCHOS_TEST_FOR_EXCEPTION(
432 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
433 prefix <<
"Environment "
435 << name <<
"\" (prefixed with \""
436 << BehaviorDetails::RESERVED_PREFIX
437 <<
"\") is not a recognized Tpetra variable.");
446 constexpr
bool defaultValue = debugDefault();
448 static bool value_ = defaultValue;
449 static bool initialized_ =
false;
450 return idempotentlyGetEnvironmentVariable(
451 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
455 if (BehaviorDetails::verboseDisabled_)
458 constexpr
bool defaultValue = verboseDefault();
460 static bool value_ = defaultValue;
461 static bool initialized_ =
false;
462 return idempotentlyGetEnvironmentVariable(
463 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
467 if (BehaviorDetails::timingDisabled_)
470 constexpr
bool defaultValue = timingDefault();
472 static bool value_ = defaultValue;
473 static bool initialized_ =
false;
474 return idempotentlyGetEnvironmentVariable(
475 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
479 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
481 static bool value_ = defaultValue;
482 static bool initialized_ =
false;
483 return idempotentlyGetEnvironmentVariable(
484 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
489 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
491 static bool value_ = defaultValue;
492 static bool initialized_ =
false;
493 return idempotentlyGetEnvironmentVariable(
494 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
499 constexpr
int _default = 3000;
500 static int value_ = _default;
501 static bool initialized_ =
false;
502 return idempotentlyGetEnvironmentVariable(
503 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
508 constexpr
size_t defaultValue(200);
510 static size_t value_ = defaultValue;
511 static bool initialized_ =
false;
512 return idempotentlyGetEnvironmentVariable(
513 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
518 constexpr
size_t defaultValue(256);
520 static size_t value_ = defaultValue;
521 static bool initialized_ =
false;
522 return idempotentlyGetEnvironmentVariable(
523 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
528 constexpr
bool defaultValue =
false;
530 static bool value_ = defaultValue;
531 static bool initialized_ =
false;
532 return idempotentlyGetEnvironmentVariable(
533 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
538 constexpr
size_t defaultValue(22000);
540 static size_t value_ = defaultValue;
541 static bool initialized_ =
false;
542 return idempotentlyGetEnvironmentVariable(
543 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
549 #ifdef HAVE_TPETRA_INST_CUDA
550 constexpr
size_t defaultValue(16);
552 constexpr
size_t defaultValue(256);
555 static size_t value_ = defaultValue;
556 static bool initialized_ =
false;
557 return idempotentlyGetEnvironmentVariable(
558 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
563 #ifdef HAVE_TPETRA_INST_CUDA
564 const size_t defaultValue(16);
566 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
569 static size_t value_ = defaultValue;
570 static bool initialized_ =
false;
571 return idempotentlyGetEnvironmentVariable(
572 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
577 constexpr
bool defaultValue(
false);
579 static bool value_ = defaultValue;
580 static bool initialized_ =
false;
581 return idempotentlyGetEnvironmentVariable(
582 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
586 constexpr
bool defaultValue(
false);
588 static bool value_ = defaultValue;
589 static bool initialized_ =
false;
590 return idempotentlyGetEnvironmentVariable(
591 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
596 constexpr
bool defaultValue =
false;
598 static bool initialized_ =
false;
599 return idempotentlyGetNamedEnvironmentVariableAsBool(
600 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
604 if (BehaviorDetails::verboseDisabled_)
607 constexpr
bool defaultValue =
false;
609 static bool initialized_ =
false;
610 return idempotentlyGetNamedEnvironmentVariableAsBool(
611 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
615 BehaviorDetails::verboseDisabled_ =
false;
619 BehaviorDetails::verboseDisabled_ =
true;
623 if (BehaviorDetails::timingDisabled_)
626 constexpr
bool defaultValue =
false;
628 static bool initialized_ =
false;
629 return idempotentlyGetNamedEnvironmentVariableAsBool(
630 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
638 constexpr
bool defaultValue = hierarchicalUnpackDefault();
640 static bool value_ = defaultValue;
641 static bool initialized_ =
false;
642 return idempotentlyGetEnvironmentVariable(
643 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
647 constexpr
bool defaultValue(
false);
649 static bool value_ = defaultValue;
650 static bool initialized_ =
false;
651 return idempotentlyGetEnvironmentVariable(
652 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
657 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
658 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
659 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
660 constexpr
bool defaultValue(
false);
662 constexpr
bool defaultValue(
true);
665 static bool value_ = defaultValue;
666 static bool initialized_ =
false;
667 return idempotentlyGetEnvironmentVariable(
668 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
672 constexpr
bool defaultValue(
false);
674 static bool value_ = defaultValue;
675 static bool initialized_ =
false;
676 return idempotentlyGetEnvironmentVariable(
677 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
681 const std::string defaultValue(
"Send");
683 static std::string value_ = defaultValue;
684 static bool initialized_ =
false;
685 return idempotentlyGetEnvironmentVariable(
686 value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
690 constexpr
bool defaultValue(
false);
692 static bool value_ = defaultValue;
693 static bool initialized_ =
false;
694 return idempotentlyGetEnvironmentVariable(
695 value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
699 constexpr
size_t defaultValue(16);
701 static size_t value_ = defaultValue;
702 static bool initialized_ =
false;
703 return idempotentlyGetEnvironmentVariable(
704 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
709 constexpr
bool defaultValue(
false);
711 static bool value_ = defaultValue;
712 static bool initialized_ =
false;
713 return idempotentlyGetEnvironmentVariable(
714 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
719 constexpr
bool defaultValue(
false);
721 static bool value_ = defaultValue;
722 static bool initialized_ =
false;
723 return idempotentlyGetEnvironmentVariable(
724 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
729 constexpr
bool defaultValue(
false);
731 static bool value_ = defaultValue;
732 static bool initialized_ =
false;
733 return idempotentlyGetEnvironmentVariable(
734 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
739 constexpr
bool defaultValue(
false);
741 static bool value_ = defaultValue;
742 static bool initialized_ =
false;
743 return idempotentlyGetEnvironmentVariable(
744 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
748 constexpr
bool defaultValue(
false);
750 static bool value_ = defaultValue;
751 static bool initialized_ =
false;
752 return idempotentlyGetEnvironmentVariable(
753 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static std::string defaultSendType()
Default send type.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.