19 #include <string_view>
22 #include "Teuchos_OrdinalTraits.hpp"
23 #include "Teuchos_TestForException.hpp"
24 #include "TpetraCore_config.h"
26 #include "KokkosKernels_config.h"
63 #if not(defined(WIN) && (_MSC_VER >= 1900))
65 extern char **environ;
71 namespace BehaviorDetails {
73 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
74 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
75 "TPETRA_ASSUME_GPU_AWARE_MPI";
76 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
77 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
78 "MM_TAFC_OptimizationCoreCount";
79 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
80 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
81 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
82 "TPETRA_ROW_IMBALANCE_THRESHOLD";
83 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
84 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
85 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
86 "TPETRA_VECTOR_DEVICE_THRESHOLD";
87 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
88 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
89 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
90 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
91 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
92 "TPETRA_USE_TEUCHOS_TIMERS";
93 constexpr
const std::string_view USE_KOKKOS_PROFILING =
94 "TPETRA_USE_KOKKOS_PROFILING";
95 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
96 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
97 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
98 constexpr
const std::string_view HIERARCHICAL_UNPACK =
99 "TPETRA_HIERARCHICAL_UNPACK";
100 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
101 "TPETRA_SKIP_COPY_AND_PERMUTE";
102 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
103 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
104 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
105 "TPETRA_SPACES_ID_WARN_LIMIT";
106 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
107 "TPETRA_TIME_KOKKOS_DEEP_COPY";
108 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
109 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
110 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
111 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
112 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
113 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
114 "TPETRA_TIME_KOKKOS_FUNCTIONS";
118 template <
typename... Elems>
119 constexpr std::array<std::string_view,
sizeof...(Elems)>
120 make_array(Elems &&... elems) {
121 return {std::forward<Elems>(elems)...};
124 constexpr
const auto RECOGNIZED_VARS = make_array(
125 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
126 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
127 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
128 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
129 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
130 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
131 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
132 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
135 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
136 bool verboseDisabled_ =
false;
137 bool timingDisabled_ =
false;
142 enum EnvironmentVariableState
144 EnvironmentVariableIsSet_ON,
145 EnvironmentVariableIsSet_OFF,
146 EnvironmentVariableIsSet,
147 EnvironmentVariableIsNotSet
153 std::string stringToUpper (std::string s)
155 std::transform (s.begin (), s.end (), s.begin (),
156 [] (
unsigned char c) {
return std::toupper (c); });
161 split(
const std::string_view s,
162 std::function<
void(
const std::string&)> f,
165 typedef std::string::size_type size_type;
166 size_type cur_pos, last_pos=0, length=s.length();
167 while(last_pos < length + 1)
169 cur_pos = s.find_first_of(sep, last_pos);
170 if(cur_pos == std::string::npos)
174 if(cur_pos!=last_pos) {
175 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
178 last_pos = cur_pos + 1;
183 EnvironmentVariableState
184 environmentVariableState(
const std::string& environmentVariableValue)
186 std::string v = stringToUpper(environmentVariableValue);
187 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
189 return EnvironmentVariableIsSet_ON;
190 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
192 return EnvironmentVariableIsSet_OFF;
194 return EnvironmentVariableIsSet;
198 setEnvironmentVariableMap (
const char environmentVariableName[],
199 std::map<std::string,std::map<std::string, bool> >& valsMap,
200 const bool defaultValue)
208 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
210 const char* varVal = getenv (environmentVariableName);
211 if (varVal ==
nullptr) {
218 const string varStr(varVal);
219 vector<string> names;
220 split(varStr, [&](
const string& x){names.push_back(x);});
221 for (
auto const& name: names) {
222 auto state = environmentVariableState(name);
223 if (state == EnvironmentVariableIsSet_ON) {
226 valsMap[environmentVariableName][
"DEFAULT"] =
true;
228 else if (state == EnvironmentVariableIsSet_OFF) {
231 valsMap[environmentVariableName][
"DEFAULT"] =
false;
236 valsMap[environmentVariableName][name] =
true;
243 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
245 const char environmentVariableName[],
246 const bool defaultValue)
248 using BehaviorDetails::namedVariableMap_;
250 setEnvironmentVariableMap (environmentVariableName,
255 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
256 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
257 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
258 return thisEnvironmentVariable->second;
259 return thisEnvironmentVariableMap[
"DEFAULT"];
263 template <
typename T>
264 T getEnvironmentVariable(
const std::string_view environmentVariableName,
265 const T defaultValue) {
266 const char prefix[] =
"Tpetra::Details::Behavior: ";
268 const char *varVal = std::getenv(environmentVariableName.data());
269 if (varVal ==
nullptr) {
272 std::stringstream ss(varVal);
276 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
277 prefix <<
"Environment "
279 << environmentVariableName
283 <<
" that cannot be parsed as a "
284 <<
typeid(T).name() <<
".");
292 bool getEnvironmentVariable<bool>(
293 const std::string_view environmentVariableName,
const bool defaultValue) {
294 const char *varVal = std::getenv(environmentVariableName.data());
295 bool retVal = defaultValue;
296 if (varVal !=
nullptr) {
297 auto state = environmentVariableState(std::string(varVal));
298 if (state == EnvironmentVariableIsSet_ON)
300 else if (state == EnvironmentVariableIsSet_OFF)
313 getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
314 const size_t defaultValue) {
315 const char prefix[] =
"Tpetra::Details::Behavior: ";
317 const char *varVal = std::getenv(environmentVariableName.data());
318 if (varVal ==
nullptr) {
321 long long val = std::stoll(stringToUpper(varVal));
322 if (val < static_cast<long long>(0)) {
324 return std::numeric_limits<size_t>::max();
326 if (
sizeof(
long long) >
sizeof(
size_t)) {
330 constexpr
long long maxSizeT =
331 static_cast<long long>(std::numeric_limits<size_t>::max());
332 TEUCHOS_TEST_FOR_EXCEPTION(
333 val > maxSizeT, std::out_of_range,
334 prefix <<
"Environment "
336 << environmentVariableName
339 << val <<
" larger than the largest size_t value " << maxSizeT
342 return static_cast<size_t>(val);
346 template <
typename T>
347 T idempotentlyGetEnvironmentVariable(
348 T &value,
bool &initialized,
const std::string_view environmentVariableName,
349 const T defaultValue) {
351 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
358 constexpr
bool debugDefault () {
359 #ifdef HAVE_TPETRA_DEBUG
363 #endif // HAVE_TPETRA_DEBUG
366 constexpr
bool verboseDefault () {
370 constexpr
bool timingDefault () {
374 constexpr
bool assumeMpiIsGPUAwareDefault () {
375 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
379 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
382 constexpr
bool cudaLaunchBlockingDefault () {
386 constexpr
bool hierarchicalUnpackDefault () {
395 static bool once =
false;
398 const char prefix[] =
"Tpetra::Details::Behavior: ";
400 #if defined(WIN) && (_MSC_VER >= 1900)
401 env = *__p__environ();
405 for (; *env; ++env) {
409 const std::string_view ev(*env);
414 [&](
const std::string &s) {
423 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
424 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
425 BehaviorDetails::RESERVED_PREFIX) {
426 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
427 BehaviorDetails::RECOGNIZED_VARS.end(), name);
428 TEUCHOS_TEST_FOR_EXCEPTION(
429 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
430 prefix <<
"Environment "
432 << name <<
"\" (prefixed with \""
433 << BehaviorDetails::RESERVED_PREFIX
434 <<
"\") is not a recognized Tpetra variable.");
443 constexpr
bool defaultValue = debugDefault();
445 static bool value_ = defaultValue;
446 static bool initialized_ =
false;
447 return idempotentlyGetEnvironmentVariable(
448 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
452 if (BehaviorDetails::verboseDisabled_)
455 constexpr
bool defaultValue = verboseDefault();
457 static bool value_ = defaultValue;
458 static bool initialized_ =
false;
459 return idempotentlyGetEnvironmentVariable(
460 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
464 if (BehaviorDetails::timingDisabled_)
467 constexpr
bool defaultValue = timingDefault();
469 static bool value_ = defaultValue;
470 static bool initialized_ =
false;
471 return idempotentlyGetEnvironmentVariable(
472 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
476 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
478 static bool value_ = defaultValue;
479 static bool initialized_ =
false;
480 return idempotentlyGetEnvironmentVariable(
481 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
486 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
488 static bool value_ = defaultValue;
489 static bool initialized_ =
false;
490 return idempotentlyGetEnvironmentVariable(
491 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
496 constexpr
int _default = 3000;
497 static int value_ = _default;
498 static bool initialized_ =
false;
499 return idempotentlyGetEnvironmentVariable(
500 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
505 constexpr
size_t defaultValue(200);
507 static size_t value_ = defaultValue;
508 static bool initialized_ =
false;
509 return idempotentlyGetEnvironmentVariable(
510 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
515 constexpr
size_t defaultValue(256);
517 static size_t value_ = defaultValue;
518 static bool initialized_ =
false;
519 return idempotentlyGetEnvironmentVariable(
520 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
525 constexpr
bool defaultValue =
false;
527 static bool value_ = defaultValue;
528 static bool initialized_ =
false;
529 return idempotentlyGetEnvironmentVariable(
530 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
535 constexpr
size_t defaultValue(22000);
537 static size_t value_ = defaultValue;
538 static bool initialized_ =
false;
539 return idempotentlyGetEnvironmentVariable(
540 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
546 #ifdef HAVE_TPETRA_INST_CUDA
547 constexpr
size_t defaultValue(16);
549 constexpr
size_t defaultValue(256);
552 static size_t value_ = defaultValue;
553 static bool initialized_ =
false;
554 return idempotentlyGetEnvironmentVariable(
555 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
560 #ifdef HAVE_TPETRA_INST_CUDA
561 const size_t defaultValue(16);
563 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
566 static size_t value_ = defaultValue;
567 static bool initialized_ =
false;
568 return idempotentlyGetEnvironmentVariable(
569 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
574 constexpr
bool defaultValue(
false);
576 static bool value_ = defaultValue;
577 static bool initialized_ =
false;
578 return idempotentlyGetEnvironmentVariable(
579 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
583 constexpr
bool defaultValue(
false);
585 static bool value_ = defaultValue;
586 static bool initialized_ =
false;
587 return idempotentlyGetEnvironmentVariable(
588 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
593 constexpr
bool defaultValue =
false;
595 static bool initialized_ =
false;
596 return idempotentlyGetNamedEnvironmentVariableAsBool(
597 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
601 if (BehaviorDetails::verboseDisabled_)
604 constexpr
bool defaultValue =
false;
606 static bool initialized_ =
false;
607 return idempotentlyGetNamedEnvironmentVariableAsBool(
608 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
612 BehaviorDetails::verboseDisabled_ =
false;
616 BehaviorDetails::verboseDisabled_ =
true;
620 if (BehaviorDetails::timingDisabled_)
623 constexpr
bool defaultValue =
false;
625 static bool initialized_ =
false;
626 return idempotentlyGetNamedEnvironmentVariableAsBool(
627 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
635 constexpr
bool defaultValue = hierarchicalUnpackDefault();
637 static bool value_ = defaultValue;
638 static bool initialized_ =
false;
639 return idempotentlyGetEnvironmentVariable(
640 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
644 constexpr
bool defaultValue(
false);
646 static bool value_ = defaultValue;
647 static bool initialized_ =
false;
648 return idempotentlyGetEnvironmentVariable(
649 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
654 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
655 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
656 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
657 constexpr
bool defaultValue(
false);
659 constexpr
bool defaultValue(
true);
662 static bool value_ = defaultValue;
663 static bool initialized_ =
false;
664 return idempotentlyGetEnvironmentVariable(
665 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
669 constexpr
bool defaultValue(
false);
671 static bool value_ = defaultValue;
672 static bool initialized_ =
false;
673 return idempotentlyGetEnvironmentVariable(
674 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
678 constexpr
size_t defaultValue(16);
680 static size_t value_ = defaultValue;
681 static bool initialized_ =
false;
682 return idempotentlyGetEnvironmentVariable(
683 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
688 constexpr
bool defaultValue(
false);
690 static bool value_ = defaultValue;
691 static bool initialized_ =
false;
692 return idempotentlyGetEnvironmentVariable(
693 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
698 constexpr
bool defaultValue(
false);
700 static bool value_ = defaultValue;
701 static bool initialized_ =
false;
702 return idempotentlyGetEnvironmentVariable(
703 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
708 constexpr
bool defaultValue(
false);
710 static bool value_ = defaultValue;
711 static bool initialized_ =
false;
712 return idempotentlyGetEnvironmentVariable(
713 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
718 constexpr
bool defaultValue(
false);
720 static bool value_ = defaultValue;
721 static bool initialized_ =
false;
722 return idempotentlyGetEnvironmentVariable(
723 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
727 constexpr
bool defaultValue(
false);
729 static bool value_ = defaultValue;
730 static bool initialized_ =
false;
731 return idempotentlyGetEnvironmentVariable(
732 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.