Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HostScaling.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <string>
11 #include <iostream>
12 #include <cstdlib>
13 
14 #include "Kokkos_Core.hpp"
15 
18 
19 #include "TestStochastic.hpp"
20 
22 
23 // Algorithms
25 const int num_sg_alg = 2;
27 const char *sg_alg_names[] = { "Original Matrix-Free", "Product CRS" };
28 
29 std::vector<double>
30 run_test(const size_t num_cpu, const size_t num_core_per_cpu,
31  const size_t num_threads_per_core,
32  const size_t p, const size_t d, const size_t nGrid, const size_t nIter,
33  const bool symmetric, SG_Alg sg_alg,
34  const std::vector<double>& perf1 = std::vector<double>())
35 {
36  typedef double Scalar;
37  typedef Kokkos::Threads Device;
38  const size_t team_count = num_cpu * num_core_per_cpu;
39  const size_t threads_per_team = num_threads_per_core;
40  Kokkos::InitializationSettings init_args;
41  init_args.set_num_threads(team_count*threads_per_team);
42  Kokkos::initialize( init_args );
43 
44  std::vector<int> var_degree( d , p );
45 
46  std::vector<double> perf;
47  if (sg_alg == PROD_CRS)
48  perf =
49  unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
50  else if (sg_alg == ORIG_MAT_FREE)
51  perf =
52  unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
53  var_degree , nGrid , nIter , true , symmetric );
54 
55  Kokkos::finalize();
56 
57  double speed_up;
58  if (perf1.size() > 0)
59  speed_up = perf1[1] / perf[1];
60  else
61  speed_up = perf[1] / perf[1];
62  double efficiency = speed_up / team_count;
63 
64  std::cout << team_count << " , "
65  << nGrid << " , "
66  << d << " , "
67  << p << " , "
68  << perf[1] << " , "
69  << perf[2] << " , "
70  << speed_up << " , "
71  << 100.0 * efficiency << " , "
72  << std::endl;
73 
74  return perf;
75 }
76 
77 int main(int argc, char *argv[])
78 {
79  bool success = true;
80 
81  try {
82  // Setup command line options
84  int p = 3;
85  CLP.setOption("p", &p, "Polynomial order");
86  int d = 4;
87  CLP.setOption("d", &d, "Stochastic dimension");
88  int nGrid = 64;
89  CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
90  int nIter = 1;
91  CLP.setOption("niter", &nIter, "Number of iterations");
92  int n_thread_per_core = 1;
93  CLP.setOption("nthread", &n_thread_per_core, "Number of threads per core to use");
94  int n_hyperthreads = 2;
95  CLP.setOption("nht", &n_hyperthreads, "Number of hyperthreads per core available");
96  SG_Alg sg_alg = PROD_CRS;
97  CLP.setOption("alg", &sg_alg, num_sg_alg, sg_alg_values, sg_alg_names,
98  "SG Mat-Vec Algorithm");
99  bool symmetric = true;
100  CLP.setOption("symmetric", "asymmetric", &symmetric, "Use symmetric PDF");
101  CLP.parse( argc, argv );
102 
103  // Detect number of CPUs and number of cores
104  const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
105  const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
106  const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
107  if (static_cast<size_t>(n_thread_per_core) > core_capacity )
108  n_thread_per_core = core_capacity;
109 
110  // Print header
111  std::cout << std::endl
112  << "\"#nCore\" , "
113  << "\"#nGrid\" , "
114  << "\"#Variable\" , "
115  << "\"PolyDegree\" , "
116  << "\"" << sg_alg_names[sg_alg] << " MXV Time\" , "
117  << "\"" << sg_alg_names[sg_alg] << " MXV GFLOPS\" , "
118  << "\"" << sg_alg_names[sg_alg] << " MXV Speedup\" , "
119  << "\"" << sg_alg_names[sg_alg] << " MXV Efficiency\" , "
120  << std::endl ;
121 
122  // Do a serial run to base speedup & efficiency from
123  const std::vector<double> perf1 =
124  run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
125 
126  // First do 1 core per cpu
127  for (size_t n=2; n<=num_cpu; ++n) {
128  const std::vector<double> perf =
129  run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
130  }
131 
132  // Now do all cpus, increasing number of cores
133  for (size_t n=2; n<=num_core_per_cpu; ++n) {
134  const std::vector<double> perf =
135  run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
136  }
137 
138  // Now do all cpus, all cores, with nthreads/core
139  const std::vector<double> perf =
140  run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
141  nIter, symmetric, sg_alg, perf1);
142 
143 
144  }
145  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
146 
147  if (!success)
148  return -1;
149  return 0 ;
150 }
const int num_sg_alg
Definition: HostScaling.cpp:25
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
SG_Alg
Definition: HostScaling.cpp:24
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
const SG_Alg sg_alg_values[]
Definition: HostScaling.cpp:26
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
Definition: HostScaling.cpp:30
int main(int argc, char **argv)
const char * sg_alg_names[]
Definition: HostScaling.cpp:27
int n