Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestAssembly.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Tests
13 #include "TestAssembly.hpp"
14 
15 // Devices
16 #include "Kokkos_Core.hpp"
17 
18 // Utilities
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
23 #endif
24 
25 // For vtune
26 #include <sys/types.h>
27 #include <unistd.h>
28 
29 int main(int argc, char *argv[])
30 {
31  bool success = true;
32  bool verbose = false;
33  try {
34 
35  // Setup command line options
37  CLP.setDocString(
38  "This test performance of MP::Vector FEM assembly.\n");
39  int nGrid = 0;
40  CLP.setOption("n", &nGrid, "Number of mesh points in each direction. Set to zero to use a range");
41  int nGridBegin = 8;
42  CLP.setOption("n-begin", &nGridBegin, "Beginning number of mesh points in each direction.");
43  int nGridEnd = 48;
44  CLP.setOption("n-end", &nGridEnd, "Ending number of mesh points in each direction.");
45  int nGridStep = 8;
46  CLP.setOption("n-step", &nGridStep, "Increment in number of mesh points in each direction.");
47  int nIter = 10;
48  CLP.setOption("ni", &nIter, "Number of assembly iterations");
49  bool print = false;
50  CLP.setOption("print", "no-print", &print, "Print debugging output");
51  bool check = false;
52  CLP.setOption("check", "no-check", &check, "Check correctness");
53  bool quadratic = false;
54  CLP.setOption("quadratic", "linear", &quadratic, "Use quadratic basis functions");
55  int num_cores = -1;
56  CLP.setOption("cores", &num_cores,
57  "Number of CPU cores to use (defaults to all)");
58  int num_hyper_threads = -1;
59  CLP.setOption("hyperthreads", &num_hyper_threads,
60  "Number of hyper threads per core to use (defaults to all)");
61 #ifdef KOKKOS_ENABLE_THREADS
62  bool threads = true;
63  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
64 #endif
65 #ifdef KOKKOS_ENABLE_OPENMP
66  bool openmp = true;
67  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
68 #endif
69 #ifdef KOKKOS_ENABLE_CUDA
70  bool cuda = true;
71  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
72  int device_id = 0;
73  CLP.setOption("device", &device_id, "CUDA device ID.");
74 #endif
75  bool vtune = false;
76  CLP.setOption("vtune", "no-vtune", &vtune, "connect to vtune");
77  CLP.parse( argc, argv );
78 
79  if (nGrid > 0) {
80  nGridBegin = nGrid;
81  nGridEnd = nGrid;
82  }
83 
84  // Connect to VTune if requested
85  if (vtune) {
86  std::stringstream cmd;
87  pid_t my_os_pid=getpid();
88  const std::string vtune_loc =
89  "amplxe-cl";
90  const std::string output_dir = "./vtune/vtune.0";
91  cmd << vtune_loc
92  << " -collect hotspots -result-dir " << output_dir
93  << " -target-pid " << my_os_pid << " &";
94  std::cout << cmd.str() << std::endl;
95  system(cmd.str().c_str());
96  system("sleep 10");
97  }
98 
99  Kokkos::initialize(argc,argv);
100 #ifdef KOKKOS_ENABLE_THREADS
101  if (threads) {
102  typedef Kokkos::Threads Device;
103 
104  std::cout << std::endl
105  << "Threads performance with " << Kokkos::Threads().concurrency()
106  << " threads:" << std::endl;
107 
108  performance_test_driver<Device>(
109  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
110  }
111 #endif
112 
113 #ifdef KOKKOS_ENABLE_OPENMP
114  if (openmp) {
115  typedef Kokkos::OpenMP Device;
116 
117  std::cout << std::endl
118  << "OpenMP performance with " << Kokkos::OpenMP().concurrency()
119  << " threads:" << std::endl;
120 
121  performance_test_driver<Device>(
122  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
123 
124  }
125 #endif
126 
127 #ifdef KOKKOS_ENABLE_CUDA
128  if (cuda) {
129  typedef Kokkos::Cuda Device;
130 
131  cudaDeviceProp deviceProp;
132  cudaGetDeviceProperties(&deviceProp, device_id);
133  std::cout << std::endl
134  << "CUDA performance performance with device " << device_id
135  << " ("
136  << deviceProp.name << "):"
137  << std::endl;
138 
139  performance_test_driver<Device>(
140  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
141 
142  }
143 #endif
144  Kokkos::finalize();
145  }
146  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
147 
148  if (success)
149  return 0;
150  return -1;
151 }
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
int main()
Definition: ad_example.cpp:171
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])