50 #include <string_view>
53 #include "Teuchos_OrdinalTraits.hpp"
54 #include "Teuchos_TestForException.hpp"
55 #include "TpetraCore_config.h"
57 #include "KokkosKernels_config.h"
94 #if not(defined(WIN) && (_MSC_VER >= 1900))
96 extern char **environ;
102 namespace BehaviorDetails {
104 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
105 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
106 "TPETRA_ASSUME_GPU_AWARE_MPI";
107 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
108 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
109 "MM_TAFC_OptimizationCoreCount";
110 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
111 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
112 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
113 "TPETRA_ROW_IMBALANCE_THRESHOLD";
114 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
115 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
116 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
117 "TPETRA_VECTOR_DEVICE_THRESHOLD";
118 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
119 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
120 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
121 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
122 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
123 "TPETRA_USE_TEUCHOS_TIMERS";
124 constexpr
const std::string_view USE_KOKKOS_PROFILING =
125 "TPETRA_USE_KOKKOS_PROFILING";
126 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
127 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
128 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
129 constexpr
const std::string_view HIERARCHICAL_UNPACK =
130 "TPETRA_HIERARCHICAL_UNPACK";
131 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
132 "TPETRA_SKIP_COPY_AND_PERMUTE";
133 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
134 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
135 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
136 "TPETRA_SPACES_ID_WARN_LIMIT";
137 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
138 "TPETRA_TIME_KOKKOS_DEEP_COPY";
139 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
140 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
141 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
142 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
143 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
144 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
145 "TPETRA_TIME_KOKKOS_FUNCTIONS";
149 template <
typename... Elems>
150 constexpr std::array<std::string_view,
sizeof...(Elems)>
151 make_array(Elems &&... elems) {
152 return {std::forward<Elems>(elems)...};
155 constexpr
const auto RECOGNIZED_VARS = make_array(
156 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
157 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
158 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
159 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
160 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
161 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
162 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
163 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
166 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
167 bool verboseDisabled_ =
false;
168 bool timingDisabled_ =
false;
173 enum EnvironmentVariableState
175 EnvironmentVariableIsSet_ON,
176 EnvironmentVariableIsSet_OFF,
177 EnvironmentVariableIsSet,
178 EnvironmentVariableIsNotSet
184 std::string stringToUpper (std::string s)
186 std::transform (s.begin (), s.end (), s.begin (),
187 [] (
unsigned char c) {
return std::toupper (c); });
192 split(
const std::string_view s,
193 std::function<
void(
const std::string&)> f,
196 typedef std::string::size_type size_type;
197 size_type cur_pos, last_pos=0, length=s.length();
198 while(last_pos < length + 1)
200 cur_pos = s.find_first_of(sep, last_pos);
201 if(cur_pos == std::string::npos)
205 if(cur_pos!=last_pos) {
206 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
209 last_pos = cur_pos + 1;
214 EnvironmentVariableState
215 environmentVariableState(
const std::string& environmentVariableValue)
217 std::string v = stringToUpper(environmentVariableValue);
218 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
220 return EnvironmentVariableIsSet_ON;
221 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
223 return EnvironmentVariableIsSet_OFF;
225 return EnvironmentVariableIsSet;
229 setEnvironmentVariableMap (
const char environmentVariableName[],
230 std::map<std::string,std::map<std::string, bool> >& valsMap,
231 const bool defaultValue)
239 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
241 const char* varVal = getenv (environmentVariableName);
242 if (varVal ==
nullptr) {
249 const string varStr(varVal);
250 vector<string> names;
251 split(varStr, [&](
const string& x){names.push_back(x);});
252 for (
auto const& name: names) {
253 auto state = environmentVariableState(name);
254 if (state == EnvironmentVariableIsSet_ON) {
257 valsMap[environmentVariableName][
"DEFAULT"] =
true;
259 else if (state == EnvironmentVariableIsSet_OFF) {
262 valsMap[environmentVariableName][
"DEFAULT"] =
false;
267 valsMap[environmentVariableName][name] =
true;
274 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
276 const char environmentVariableName[],
277 const bool defaultValue)
279 using BehaviorDetails::namedVariableMap_;
281 setEnvironmentVariableMap (environmentVariableName,
286 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
287 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
288 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
289 return thisEnvironmentVariable->second;
290 return thisEnvironmentVariableMap[
"DEFAULT"];
294 template <
typename T>
295 T getEnvironmentVariable(
const std::string_view environmentVariableName,
296 const T defaultValue) {
297 const char prefix[] =
"Tpetra::Details::Behavior: ";
299 const char *varVal = std::getenv(environmentVariableName.data());
300 if (varVal ==
nullptr) {
303 std::stringstream ss(varVal);
307 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
308 prefix <<
"Environment "
310 << environmentVariableName
314 <<
" that cannot be parsed as a "
315 <<
typeid(T).name() <<
".");
323 bool getEnvironmentVariable<bool>(
324 const std::string_view environmentVariableName,
const bool defaultValue) {
325 const char *varVal = std::getenv(environmentVariableName.data());
326 bool retVal = defaultValue;
327 if (varVal !=
nullptr) {
328 auto state = environmentVariableState(std::string(varVal));
329 if (state == EnvironmentVariableIsSet_ON)
331 else if (state == EnvironmentVariableIsSet_OFF)
344 getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
345 const size_t defaultValue) {
346 const char prefix[] =
"Tpetra::Details::Behavior: ";
348 const char *varVal = std::getenv(environmentVariableName.data());
349 if (varVal ==
nullptr) {
352 long long val = std::stoll(stringToUpper(varVal));
353 if (val < static_cast<long long>(0)) {
355 return std::numeric_limits<size_t>::max();
357 if (
sizeof(
long long) >
sizeof(
size_t)) {
361 constexpr
long long maxSizeT =
362 static_cast<long long>(std::numeric_limits<size_t>::max());
363 TEUCHOS_TEST_FOR_EXCEPTION(
364 val > maxSizeT, std::out_of_range,
365 prefix <<
"Environment "
367 << environmentVariableName
370 << val <<
" larger than the largest size_t value " << maxSizeT
373 return static_cast<size_t>(val);
377 template <
typename T>
378 T idempotentlyGetEnvironmentVariable(
379 T &value,
bool &initialized,
const std::string_view environmentVariableName,
380 const T defaultValue) {
382 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
389 constexpr
bool debugDefault () {
390 #ifdef HAVE_TPETRA_DEBUG
394 #endif // HAVE_TPETRA_DEBUG
397 constexpr
bool verboseDefault () {
401 constexpr
bool timingDefault () {
405 constexpr
bool assumeMpiIsGPUAwareDefault () {
406 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
410 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
413 constexpr
bool cudaLaunchBlockingDefault () {
417 constexpr
bool hierarchicalUnpackDefault () {
426 static bool once =
false;
429 const char prefix[] =
"Tpetra::Details::Behavior: ";
431 #if defined(WIN) && (_MSC_VER >= 1900)
432 env = *__p__environ();
436 for (; *env; ++env) {
440 const std::string_view ev(*env);
445 [&](
const std::string &s) {
454 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
455 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
456 BehaviorDetails::RESERVED_PREFIX) {
457 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
458 BehaviorDetails::RECOGNIZED_VARS.end(), name);
459 TEUCHOS_TEST_FOR_EXCEPTION(
460 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
461 prefix <<
"Environment "
463 << name <<
"\" (prefixed with \""
464 << BehaviorDetails::RESERVED_PREFIX
465 <<
"\") is not a recognized Tpetra variable.");
474 constexpr
bool defaultValue = debugDefault();
476 static bool value_ = defaultValue;
477 static bool initialized_ =
false;
478 return idempotentlyGetEnvironmentVariable(
479 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
483 if (BehaviorDetails::verboseDisabled_)
486 constexpr
bool defaultValue = verboseDefault();
488 static bool value_ = defaultValue;
489 static bool initialized_ =
false;
490 return idempotentlyGetEnvironmentVariable(
491 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
495 if (BehaviorDetails::timingDisabled_)
498 constexpr
bool defaultValue = timingDefault();
500 static bool value_ = defaultValue;
501 static bool initialized_ =
false;
502 return idempotentlyGetEnvironmentVariable(
503 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
507 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
509 static bool value_ = defaultValue;
510 static bool initialized_ =
false;
511 return idempotentlyGetEnvironmentVariable(
512 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
517 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
519 static bool value_ = defaultValue;
520 static bool initialized_ =
false;
521 return idempotentlyGetEnvironmentVariable(
522 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
527 constexpr
int _default = 3000;
528 static int value_ = _default;
529 static bool initialized_ =
false;
530 return idempotentlyGetEnvironmentVariable(
531 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
536 constexpr
size_t defaultValue(200);
538 static size_t value_ = defaultValue;
539 static bool initialized_ =
false;
540 return idempotentlyGetEnvironmentVariable(
541 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
546 constexpr
size_t defaultValue(256);
548 static size_t value_ = defaultValue;
549 static bool initialized_ =
false;
550 return idempotentlyGetEnvironmentVariable(
551 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
556 constexpr
bool defaultValue =
false;
558 static bool value_ = defaultValue;
559 static bool initialized_ =
false;
560 return idempotentlyGetEnvironmentVariable(
561 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
566 constexpr
size_t defaultValue(22000);
568 static size_t value_ = defaultValue;
569 static bool initialized_ =
false;
570 return idempotentlyGetEnvironmentVariable(
571 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
577 #ifdef HAVE_TPETRA_INST_CUDA
578 constexpr
size_t defaultValue(16);
580 constexpr
size_t defaultValue(256);
583 static size_t value_ = defaultValue;
584 static bool initialized_ =
false;
585 return idempotentlyGetEnvironmentVariable(
586 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
591 #ifdef HAVE_TPETRA_INST_CUDA
592 const size_t defaultValue(16);
594 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
597 static size_t value_ = defaultValue;
598 static bool initialized_ =
false;
599 return idempotentlyGetEnvironmentVariable(
600 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
605 constexpr
bool defaultValue(
false);
607 static bool value_ = defaultValue;
608 static bool initialized_ =
false;
609 return idempotentlyGetEnvironmentVariable(
610 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
614 constexpr
bool defaultValue(
false);
616 static bool value_ = defaultValue;
617 static bool initialized_ =
false;
618 return idempotentlyGetEnvironmentVariable(
619 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
624 constexpr
bool defaultValue =
false;
626 static bool initialized_ =
false;
627 return idempotentlyGetNamedEnvironmentVariableAsBool(
628 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
632 if (BehaviorDetails::verboseDisabled_)
635 constexpr
bool defaultValue =
false;
637 static bool initialized_ =
false;
638 return idempotentlyGetNamedEnvironmentVariableAsBool(
639 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
643 BehaviorDetails::verboseDisabled_ =
false;
647 BehaviorDetails::verboseDisabled_ =
true;
651 if (BehaviorDetails::timingDisabled_)
654 constexpr
bool defaultValue =
false;
656 static bool initialized_ =
false;
657 return idempotentlyGetNamedEnvironmentVariableAsBool(
658 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
666 constexpr
bool defaultValue = hierarchicalUnpackDefault();
668 static bool value_ = defaultValue;
669 static bool initialized_ =
false;
670 return idempotentlyGetEnvironmentVariable(
671 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
675 constexpr
bool defaultValue(
false);
677 static bool value_ = defaultValue;
678 static bool initialized_ =
false;
679 return idempotentlyGetEnvironmentVariable(
680 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
685 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
686 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
687 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
688 constexpr
bool defaultValue(
false);
690 constexpr
bool defaultValue(
true);
693 static bool value_ = defaultValue;
694 static bool initialized_ =
false;
695 return idempotentlyGetEnvironmentVariable(
696 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
700 constexpr
bool defaultValue(
false);
702 static bool value_ = defaultValue;
703 static bool initialized_ =
false;
704 return idempotentlyGetEnvironmentVariable(
705 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
709 constexpr
size_t defaultValue(16);
711 static size_t value_ = defaultValue;
712 static bool initialized_ =
false;
713 return idempotentlyGetEnvironmentVariable(
714 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
719 constexpr
bool defaultValue(
false);
721 static bool value_ = defaultValue;
722 static bool initialized_ =
false;
723 return idempotentlyGetEnvironmentVariable(
724 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
729 constexpr
bool defaultValue(
false);
731 static bool value_ = defaultValue;
732 static bool initialized_ =
false;
733 return idempotentlyGetEnvironmentVariable(
734 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
739 constexpr
bool defaultValue(
false);
741 static bool value_ = defaultValue;
742 static bool initialized_ =
false;
743 return idempotentlyGetEnvironmentVariable(
744 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
749 constexpr
bool defaultValue(
false);
751 static bool value_ = defaultValue;
752 static bool initialized_ =
false;
753 return idempotentlyGetEnvironmentVariable(
754 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
758 constexpr
bool defaultValue(
false);
760 static bool value_ = defaultValue;
761 static bool initialized_ =
false;
762 return idempotentlyGetEnvironmentVariable(
763 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.