Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HostScaling.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <string>
43 #include <iostream>
44 #include <cstdlib>
45 
46 #include "Kokkos_Core.hpp"
47 
50 
51 #include "TestStochastic.hpp"
52 
54 
55 // Algorithms
57 const int num_sg_alg = 2;
59 const char *sg_alg_names[] = { "Original Matrix-Free", "Product CRS" };
60 
61 std::vector<double>
62 run_test(const size_t num_cpu, const size_t num_core_per_cpu,
63  const size_t num_threads_per_core,
64  const size_t p, const size_t d, const size_t nGrid, const size_t nIter,
65  const bool symmetric, SG_Alg sg_alg,
66  const std::vector<double>& perf1 = std::vector<double>())
67 {
68  typedef double Scalar;
69  typedef Kokkos::Threads Device;
70  const size_t team_count = num_cpu * num_core_per_cpu;
71  const size_t threads_per_team = num_threads_per_core;
72  Kokkos::InitArguments init_args;
73  init_args.num_threads = team_count*threads_per_team;
74  Kokkos::initialize( init_args );
75 
76  std::vector<int> var_degree( d , p );
77 
78  std::vector<double> perf;
79  if (sg_alg == PROD_CRS)
80  perf =
81  unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
82  else if (sg_alg == ORIG_MAT_FREE)
83  perf =
84  unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
85  var_degree , nGrid , nIter , true , symmetric );
86 
87  Kokkos::finalize();
88 
89  double speed_up;
90  if (perf1.size() > 0)
91  speed_up = perf1[1] / perf[1];
92  else
93  speed_up = perf[1] / perf[1];
94  double efficiency = speed_up / team_count;
95 
96  std::cout << team_count << " , "
97  << nGrid << " , "
98  << d << " , "
99  << p << " , "
100  << perf[1] << " , "
101  << perf[2] << " , "
102  << speed_up << " , "
103  << 100.0 * efficiency << " , "
104  << std::endl;
105 
106  return perf;
107 }
108 
109 int main(int argc, char *argv[])
110 {
111  bool success = true;
112 
113  try {
114  // Setup command line options
116  int p = 3;
117  CLP.setOption("p", &p, "Polynomial order");
118  int d = 4;
119  CLP.setOption("d", &d, "Stochastic dimension");
120  int nGrid = 64;
121  CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
122  int nIter = 1;
123  CLP.setOption("niter", &nIter, "Number of iterations");
124  int n_thread_per_core = 1;
125  CLP.setOption("nthread", &n_thread_per_core, "Number of threads per core to use");
126  int n_hyperthreads = 2;
127  CLP.setOption("nht", &n_hyperthreads, "Number of hyperthreads per core available");
128  SG_Alg sg_alg = PROD_CRS;
129  CLP.setOption("alg", &sg_alg, num_sg_alg, sg_alg_values, sg_alg_names,
130  "SG Mat-Vec Algorithm");
131  bool symmetric = true;
132  CLP.setOption("symmetric", "asymmetric", &symmetric, "Use symmetric PDF");
133  CLP.parse( argc, argv );
134 
135  // Detect number of CPUs and number of cores
136  const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
137  const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
138  const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
139  if (static_cast<size_t>(n_thread_per_core) > core_capacity )
140  n_thread_per_core = core_capacity;
141 
142  // Print header
143  std::cout << std::endl
144  << "\"#nCore\" , "
145  << "\"#nGrid\" , "
146  << "\"#Variable\" , "
147  << "\"PolyDegree\" , "
148  << "\"" << sg_alg_names[sg_alg] << " MXV Time\" , "
149  << "\"" << sg_alg_names[sg_alg] << " MXV GFLOPS\" , "
150  << "\"" << sg_alg_names[sg_alg] << " MXV Speedup\" , "
151  << "\"" << sg_alg_names[sg_alg] << " MXV Efficiency\" , "
152  << std::endl ;
153 
154  // Do a serial run to base speedup & efficiency from
155  const std::vector<double> perf1 =
156  run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
157 
158  // First do 1 core per cpu
159  for (size_t n=2; n<=num_cpu; ++n) {
160  const std::vector<double> perf =
161  run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
162  }
163 
164  // Now do all cpus, increasing number of cores
165  for (size_t n=2; n<=num_core_per_cpu; ++n) {
166  const std::vector<double> perf =
167  run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
168  }
169 
170  // Now do all cpus, all cores, with nthreads/core
171  const std::vector<double> perf =
172  run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
173  nIter, symmetric, sg_alg, perf1);
174 
175 
176  }
177  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
178 
179  if (!success)
180  return -1;
181  return 0 ;
182 }
const int num_sg_alg
Definition: HostScaling.cpp:57
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
SG_Alg
Definition: HostScaling.cpp:56
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
const SG_Alg sg_alg_values[]
Definition: HostScaling.cpp:58
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
Definition: HostScaling.cpp:62
int main(int argc, char **argv)
const char * sg_alg_names[]
Definition: HostScaling.cpp:59
int n