Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
view/TestAssembly.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Tests
13 #include "TestAssembly.hpp"
14 
15 // Devices
16 #include "Kokkos_Core.hpp"
17 
18 // Utilities
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
23 #endif
24 
25 // For vtune
26 #include <sys/types.h>
27 #include <unistd.h>
28 
29 int main(int argc, char *argv[])
30 {
31  bool success = true;
32  bool verbose = false;
33  try {
34 
35  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
36  const size_t num_cores_per_socket =
37  Kokkos::hwloc::get_available_cores_per_numa();
38  const size_t num_threads_per_core =
39  Kokkos::hwloc::get_available_threads_per_core();
40 
41  // Setup command line options
43  CLP.setDocString(
44  "This test performance of MP::Vector FEM assembly.\n");
45  int nGrid = 0;
46  CLP.setOption("n", &nGrid, "Number of mesh points in each direction. Set to zero to use a range");
47  int nGridBegin = 8;
48  CLP.setOption("n-begin", &nGridBegin, "Beginning number of mesh points in each direction.");
49  int nGridEnd = 48;
50  CLP.setOption("n-end", &nGridEnd, "Ending number of mesh points in each direction.");
51  int nGridStep = 8;
52  CLP.setOption("n-step", &nGridStep, "Increment in number of mesh points in each direction.");
53  int nIter = 10;
54  CLP.setOption("ni", &nIter, "Number of assembly iterations");
55  bool print = false;
56  CLP.setOption("print", "no-print", &print, "Print debugging output");
57  bool check = false;
58  CLP.setOption("check", "no-check", &check, "Check correctness");
59  bool quadratic = false;
60  CLP.setOption("quadratic", "linear", &quadratic, "Use quadratic basis functions");
61  int num_cores = num_cores_per_socket * num_sockets;
62  CLP.setOption("cores", &num_cores,
63  "Number of CPU cores to use (defaults to all)");
64  int num_hyper_threads = num_threads_per_core;
65  CLP.setOption("hyperthreads", &num_hyper_threads,
66  "Number of hyper threads per core to use (defaults to all)");
67 #ifdef KOKKOS_ENABLE_THREADS
68  bool threads = true;
69  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
70 #endif
71 #ifdef KOKKOS_ENABLE_OPENMP
72  bool openmp = true;
73  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
74 #endif
75 #ifdef KOKKOS_ENABLE_CUDA
76  bool cuda = true;
77  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
78  int device_id = 0;
79  CLP.setOption("device", &device_id, "CUDA device ID.");
80 #endif
81  bool vtune = false;
82  CLP.setOption("vtune", "no-vtune", &vtune, "connect to vtune");
83  CLP.parse( argc, argv );
84 
85  if (nGrid > 0) {
86  nGridBegin = nGrid;
87  nGridEnd = nGrid;
88  }
89 
90  // Connect to VTune if requested
91  if (vtune) {
92  std::stringstream cmd;
93  pid_t my_os_pid=getpid();
94  const std::string vtune_loc =
95  "amplxe-cl";
96  const std::string output_dir = "./vtune/vtune.0";
97  cmd << vtune_loc
98  << " -collect hotspots -result-dir " << output_dir
99  << " -target-pid " << my_os_pid << " &";
100  std::cout << cmd.str() << std::endl;
101  system(cmd.str().c_str());
102  system("sleep 10");
103  }
104 
105  Kokkos::initialize(argc,argv);
106 #ifdef KOKKOS_ENABLE_THREADS
107  if (threads) {
108  typedef Kokkos::Threads Device;
109 
110  std::cout << std::endl
111  << "Threads performance with " << num_cores*num_hyper_threads
112  << " threads:" << std::endl;
113 
114  performance_test_driver<Device>(
115  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
116  }
117 #endif
118 
119 #ifdef KOKKOS_ENABLE_OPENMP
120  if (openmp) {
121  typedef Kokkos::OpenMP Device;
122 
123  std::cout << std::endl
124  << "OpenMP performance with " << num_cores*num_hyper_threads
125  << " threads:" << std::endl;
126 
127  performance_test_driver<Device>(
128  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
129  }
130 #endif
131 
132 #ifdef KOKKOS_ENABLE_CUDA
133  if (cuda) {
134  typedef Kokkos::Cuda Device;
135 
136  cudaDeviceProp deviceProp;
137  cudaGetDeviceProperties(&deviceProp, device_id);
138  std::cout << std::endl
139  << "CUDA performance performance with device " << device_id
140  << " ("
141  << deviceProp.name << "):"
142  << std::endl;
143 
144  performance_test_driver<Device>(
145  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
146 
147  }
148 #endif
149  Kokkos::finalize();
150  }
151  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
152 
153  if (success)
154  return 0;
155  return -1;
156 }
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
int main()
Definition: ad_example.cpp:171
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])