50 #include <string_view>
53 #include "Teuchos_OrdinalTraits.hpp"
54 #include "Teuchos_TestForException.hpp"
55 #include "TpetraCore_config.h"
93 #if not(defined(WIN) && (_MSC_VER >= 1900))
95 extern char **environ;
101 namespace BehaviorDetails {
103 constexpr
const std::string_view RESERVED_PREFIX =
"TPETRA_";
104 constexpr
const std::string_view ASSUME_GPU_AWARE_MPI =
105 "TPETRA_ASSUME_GPU_AWARE_MPI";
106 constexpr
const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
107 constexpr
const std::string_view MM_TAFC_OptimizationCoreCount =
108 "MM_TAFC_OptimizationCoreCount";
109 constexpr
const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
110 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
111 constexpr
const std::string_view ROW_IMBALANCE_THRESHOLD =
112 "TPETRA_ROW_IMBALANCE_THRESHOLD";
113 constexpr
const std::string_view MULTIVECTOR_USE_MERGE_PATH =
114 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
115 constexpr
const std::string_view VECTOR_DEVICE_THRESHOLD =
116 "TPETRA_VECTOR_DEVICE_THRESHOLD";
117 constexpr
const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
118 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
119 constexpr
const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
120 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
121 constexpr
const std::string_view USE_TEUCHOS_TIMERS =
122 "TPETRA_USE_TEUCHOS_TIMERS";
123 constexpr
const std::string_view USE_KOKKOS_PROFILING =
124 "TPETRA_USE_KOKKOS_PROFILING";
125 constexpr
const std::string_view DEBUG =
"TPETRA_DEBUG";
126 constexpr
const std::string_view VERBOSE =
"TPETRA_VERBOSE";
127 constexpr
const std::string_view TIMING =
"TPETRA_TIMING";
128 constexpr
const std::string_view HIERARCHICAL_UNPACK =
129 "TPETRA_HIERARCHICAL_UNPACK";
130 constexpr
const std::string_view SKIP_COPY_AND_PERMUTE =
131 "TPETRA_SKIP_COPY_AND_PERMUTE";
132 constexpr
const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
133 constexpr
const std::string_view OVERLAP =
"TPETRA_OVERLAP";
134 constexpr
const std::string_view SPACES_ID_WARN_LIMIT =
135 "TPETRA_SPACES_ID_WARN_LIMIT";
136 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY =
137 "TPETRA_TIME_KOKKOS_DEEP_COPY";
138 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
139 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
140 constexpr
const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
141 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
142 constexpr
const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
143 constexpr
const std::string_view TIME_KOKKOS_FUNCTIONS =
144 "TPETRA_TIME_KOKKOS_FUNCTIONS";
148 template <
typename... Elems>
149 constexpr std::array<std::string_view,
sizeof...(Elems)>
150 make_array(Elems &&... elems) {
151 return {std::forward<Elems>(elems)...};
154 constexpr
const auto RECOGNIZED_VARS = make_array(
155 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
156 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
157 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
158 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
159 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
160 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
161 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
162 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
165 std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
166 bool verboseDisabled_ =
false;
167 bool timingDisabled_ =
false;
172 enum EnvironmentVariableState
174 EnvironmentVariableIsSet_ON,
175 EnvironmentVariableIsSet_OFF,
176 EnvironmentVariableIsSet,
177 EnvironmentVariableIsNotSet
183 std::string stringToUpper (std::string s)
185 std::transform (s.begin (), s.end (), s.begin (),
186 [] (
unsigned char c) {
return std::toupper (c); });
191 split(
const std::string_view s,
192 std::function<
void(
const std::string&)> f,
195 typedef std::string::size_type size_type;
196 size_type cur_pos, last_pos=0, length=s.length();
197 while(last_pos < length + 1)
199 cur_pos = s.find_first_of(sep, last_pos);
200 if(cur_pos == std::string::npos)
204 if(cur_pos!=last_pos) {
205 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
208 last_pos = cur_pos + 1;
213 EnvironmentVariableState
214 environmentVariableState(
const std::string& environmentVariableValue)
216 std::string v = stringToUpper(environmentVariableValue);
217 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
219 return EnvironmentVariableIsSet_ON;
220 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
222 return EnvironmentVariableIsSet_OFF;
224 return EnvironmentVariableIsSet;
228 setEnvironmentVariableMap (
const char environmentVariableName[],
229 std::map<std::string,std::map<std::string, bool> >& valsMap,
230 const bool defaultValue)
238 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
240 const char* varVal = getenv (environmentVariableName);
241 if (varVal ==
nullptr) {
248 const string varStr(varVal);
249 vector<string> names;
250 split(varStr, [&](
const string& x){names.push_back(x);});
251 for (
auto const& name: names) {
252 auto state = environmentVariableState(name);
253 if (state == EnvironmentVariableIsSet_ON) {
256 valsMap[environmentVariableName][
"DEFAULT"] =
true;
258 else if (state == EnvironmentVariableIsSet_OFF) {
261 valsMap[environmentVariableName][
"DEFAULT"] =
false;
266 valsMap[environmentVariableName][name] =
true;
273 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
275 const char environmentVariableName[],
276 const bool defaultValue)
278 using BehaviorDetails::namedVariableMap_;
280 setEnvironmentVariableMap (environmentVariableName,
285 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
286 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
287 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
288 return thisEnvironmentVariable->second;
289 return thisEnvironmentVariableMap[
"DEFAULT"];
293 template <
typename T>
294 T getEnvironmentVariable(
const std::string_view environmentVariableName,
295 const T defaultValue) {
296 const char prefix[] =
"Tpetra::Details::Behavior: ";
298 const char *varVal = std::getenv(environmentVariableName.data());
299 if (varVal ==
nullptr) {
302 std::stringstream ss(varVal);
306 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
307 prefix <<
"Environment "
309 << environmentVariableName
313 <<
" that cannot be parsed as a "
314 <<
typeid(T).name() <<
".");
322 bool getEnvironmentVariable<bool>(
323 const std::string_view environmentVariableName,
const bool defaultValue) {
324 const char *varVal = std::getenv(environmentVariableName.data());
325 bool retVal = defaultValue;
326 if (varVal !=
nullptr) {
327 auto state = environmentVariableState(std::string(varVal));
328 if (state == EnvironmentVariableIsSet_ON)
330 else if (state == EnvironmentVariableIsSet_OFF)
343 getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
344 const size_t defaultValue) {
345 const char prefix[] =
"Tpetra::Details::Behavior: ";
347 const char *varVal = std::getenv(environmentVariableName.data());
348 if (varVal ==
nullptr) {
351 long long val = std::stoll(stringToUpper(varVal));
352 if (val < static_cast<long long>(0)) {
354 return std::numeric_limits<size_t>::max();
356 if (
sizeof(
long long) >
sizeof(
size_t)) {
360 constexpr
long long maxSizeT =
361 static_cast<long long>(std::numeric_limits<size_t>::max());
362 TEUCHOS_TEST_FOR_EXCEPTION(
363 val > maxSizeT, std::out_of_range,
364 prefix <<
"Environment "
366 << environmentVariableName
369 << val <<
" larger than the largest size_t value " << maxSizeT
372 return static_cast<size_t>(val);
376 template <
typename T>
377 T idempotentlyGetEnvironmentVariable(
378 T &value,
bool &initialized,
const std::string_view environmentVariableName,
379 const T defaultValue) {
381 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
388 constexpr
bool debugDefault () {
389 #ifdef HAVE_TPETRA_DEBUG
393 #endif // HAVE_TPETRA_DEBUG
396 constexpr
bool verboseDefault () {
400 constexpr
bool timingDefault () {
404 constexpr
bool assumeMpiIsGPUAwareDefault () {
405 #ifdef TPETRA_ASSUME_GPU_AWARE_MPI
409 #endif // TPETRA_ASSUME_GPU_AWARE_MPI
412 constexpr
bool cudaLaunchBlockingDefault () {
416 constexpr
bool hierarchicalUnpackDefault () {
425 static bool once =
false;
428 const char prefix[] =
"Tpetra::Details::Behavior: ";
430 #if defined(WIN) && (_MSC_VER >= 1900)
431 env = *__p__environ();
435 for (; *env; ++env) {
439 const std::string_view ev(*env);
444 [&](
const std::string &s) {
453 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
454 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
455 BehaviorDetails::RESERVED_PREFIX) {
456 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
457 BehaviorDetails::RECOGNIZED_VARS.end(), name);
458 TEUCHOS_TEST_FOR_EXCEPTION(
459 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
460 prefix <<
"Environment "
462 << name <<
"\" (prefixed with \""
463 << BehaviorDetails::RESERVED_PREFIX
464 <<
"\") is not a recognized Tpetra variable.");
473 constexpr
bool defaultValue = debugDefault();
475 static bool value_ = defaultValue;
476 static bool initialized_ =
false;
477 return idempotentlyGetEnvironmentVariable(
478 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
482 if (BehaviorDetails::verboseDisabled_)
485 constexpr
bool defaultValue = verboseDefault();
487 static bool value_ = defaultValue;
488 static bool initialized_ =
false;
489 return idempotentlyGetEnvironmentVariable(
490 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
494 if (BehaviorDetails::timingDisabled_)
497 constexpr
bool defaultValue = timingDefault();
499 static bool value_ = defaultValue;
500 static bool initialized_ =
false;
501 return idempotentlyGetEnvironmentVariable(
502 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
506 constexpr
bool defaultValue = assumeMpiIsGPUAwareDefault();
508 static bool value_ = defaultValue;
509 static bool initialized_ =
false;
510 return idempotentlyGetEnvironmentVariable(
511 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
516 constexpr
bool defaultValue = cudaLaunchBlockingDefault();
518 static bool value_ = defaultValue;
519 static bool initialized_ =
false;
520 return idempotentlyGetEnvironmentVariable(
521 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
526 constexpr
int _default = 3000;
527 static int value_ = _default;
528 static bool initialized_ =
false;
529 return idempotentlyGetEnvironmentVariable(
530 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
535 constexpr
size_t defaultValue(200);
537 static size_t value_ = defaultValue;
538 static bool initialized_ =
false;
539 return idempotentlyGetEnvironmentVariable(
540 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
545 constexpr
size_t defaultValue(256);
547 static size_t value_ = defaultValue;
548 static bool initialized_ =
false;
549 return idempotentlyGetEnvironmentVariable(
550 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
555 constexpr
bool defaultValue =
false;
557 static bool value_ = defaultValue;
558 static bool initialized_ =
false;
559 return idempotentlyGetEnvironmentVariable(
560 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
565 constexpr
size_t defaultValue(22000);
567 static size_t value_ = defaultValue;
568 static bool initialized_ =
false;
569 return idempotentlyGetEnvironmentVariable(
570 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
576 #ifdef HAVE_TPETRA_INST_CUDA
577 constexpr
size_t defaultValue(16);
579 constexpr
size_t defaultValue(256);
582 static size_t value_ = defaultValue;
583 static bool initialized_ =
false;
584 return idempotentlyGetEnvironmentVariable(
585 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
590 #ifdef HAVE_TPETRA_INST_CUDA
591 const size_t defaultValue(16);
593 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
596 static size_t value_ = defaultValue;
597 static bool initialized_ =
false;
598 return idempotentlyGetEnvironmentVariable(
599 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
604 constexpr
bool defaultValue(
false);
606 static bool value_ = defaultValue;
607 static bool initialized_ =
false;
608 return idempotentlyGetEnvironmentVariable(
609 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
613 constexpr
bool defaultValue(
false);
615 static bool value_ = defaultValue;
616 static bool initialized_ =
false;
617 return idempotentlyGetEnvironmentVariable(
618 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
623 constexpr
bool defaultValue =
false;
625 static bool initialized_ =
false;
626 return idempotentlyGetNamedEnvironmentVariableAsBool(
627 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
631 if (BehaviorDetails::verboseDisabled_)
634 constexpr
bool defaultValue =
false;
636 static bool initialized_ =
false;
637 return idempotentlyGetNamedEnvironmentVariableAsBool(
638 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
642 BehaviorDetails::verboseDisabled_ =
false;
646 BehaviorDetails::verboseDisabled_ =
true;
650 if (BehaviorDetails::timingDisabled_)
653 constexpr
bool defaultValue =
false;
655 static bool initialized_ =
false;
656 return idempotentlyGetNamedEnvironmentVariableAsBool(
657 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
665 constexpr
bool defaultValue = hierarchicalUnpackDefault();
667 static bool value_ = defaultValue;
668 static bool initialized_ =
false;
669 return idempotentlyGetEnvironmentVariable(
670 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
674 constexpr
bool defaultValue(
false);
676 static bool value_ = defaultValue;
677 static bool initialized_ =
false;
678 return idempotentlyGetEnvironmentVariable(
679 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
684 constexpr
bool defaultValue(
true);
686 static bool value_ = defaultValue;
687 static bool initialized_ =
false;
688 return idempotentlyGetEnvironmentVariable(
689 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
693 constexpr
bool defaultValue(
false);
695 static bool value_ = defaultValue;
696 static bool initialized_ =
false;
697 return idempotentlyGetEnvironmentVariable(
698 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
702 constexpr
size_t defaultValue(16);
704 static size_t value_ = defaultValue;
705 static bool initialized_ =
false;
706 return idempotentlyGetEnvironmentVariable(
707 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
712 constexpr
bool defaultValue(
false);
714 static bool value_ = defaultValue;
715 static bool initialized_ =
false;
716 return idempotentlyGetEnvironmentVariable(
717 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
722 constexpr
bool defaultValue(
false);
724 static bool value_ = defaultValue;
725 static bool initialized_ =
false;
726 return idempotentlyGetEnvironmentVariable(
727 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
732 constexpr
bool defaultValue(
false);
734 static bool value_ = defaultValue;
735 static bool initialized_ =
false;
736 return idempotentlyGetEnvironmentVariable(
737 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
742 constexpr
bool defaultValue(
false);
744 static bool value_ = defaultValue;
745 static bool initialized_ =
false;
746 return idempotentlyGetEnvironmentVariable(
747 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
751 constexpr
bool defaultValue(
false);
753 static bool value_ = defaultValue;
754 static bool initialized_ =
false;
755 return idempotentlyGetEnvironmentVariable(
756 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static bool timing()
Whether Tpetra is in timing mode.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool debug()
Whether Tpetra is in debug mode.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static void disable_timing()
Disable timing, programatically.
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static void enable_timing()
Enable timing, programatically.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.