Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
view/TestAssembly.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <iostream>
43 
44 // Tests
45 #include "TestAssembly.hpp"
46 
47 // Devices
48 #include "Kokkos_Core.hpp"
49 
50 // Utilities
53 #ifdef KOKKOS_ENABLE_CUDA
54 #include "cuda_runtime_api.h"
55 #endif
56 
57 // For vtune
58 #include <sys/types.h>
59 #include <unistd.h>
60 
61 int main(int argc, char *argv[])
62 {
63  bool success = true;
64  bool verbose = false;
65  try {
66 
67  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
68  const size_t num_cores_per_socket =
69  Kokkos::hwloc::get_available_cores_per_numa();
70  const size_t num_threads_per_core =
71  Kokkos::hwloc::get_available_threads_per_core();
72 
73  // Setup command line options
75  CLP.setDocString(
76  "This test performance of MP::Vector FEM assembly.\n");
77  int nGrid = 0;
78  CLP.setOption("n", &nGrid, "Number of mesh points in each direction. Set to zero to use a range");
79  int nGridBegin = 8;
80  CLP.setOption("n-begin", &nGridBegin, "Beginning number of mesh points in each direction.");
81  int nGridEnd = 48;
82  CLP.setOption("n-end", &nGridEnd, "Ending number of mesh points in each direction.");
83  int nGridStep = 8;
84  CLP.setOption("n-step", &nGridStep, "Increment in number of mesh points in each direction.");
85  int nIter = 10;
86  CLP.setOption("ni", &nIter, "Number of assembly iterations");
87  bool print = false;
88  CLP.setOption("print", "no-print", &print, "Print debugging output");
89  bool check = false;
90  CLP.setOption("check", "no-check", &check, "Check correctness");
91  bool quadratic = false;
92  CLP.setOption("quadratic", "linear", &quadratic, "Use quadratic basis functions");
93  int num_cores = num_cores_per_socket * num_sockets;
94  CLP.setOption("cores", &num_cores,
95  "Number of CPU cores to use (defaults to all)");
96  int num_hyper_threads = num_threads_per_core;
97  CLP.setOption("hyperthreads", &num_hyper_threads,
98  "Number of hyper threads per core to use (defaults to all)");
99 #ifdef KOKKOS_ENABLE_THREADS
100  bool threads = true;
101  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
102 #endif
103 #ifdef KOKKOS_ENABLE_OPENMP
104  bool openmp = true;
105  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
106 #endif
107 #ifdef KOKKOS_ENABLE_CUDA
108  bool cuda = true;
109  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
110  int device_id = 0;
111  CLP.setOption("device", &device_id, "CUDA device ID.");
112 #endif
113  bool vtune = false;
114  CLP.setOption("vtune", "no-vtune", &vtune, "connect to vtune");
115  CLP.parse( argc, argv );
116 
117  if (nGrid > 0) {
118  nGridBegin = nGrid;
119  nGridEnd = nGrid;
120  }
121 
122  // Connect to VTune if requested
123  if (vtune) {
124  std::stringstream cmd;
125  pid_t my_os_pid=getpid();
126  const std::string vtune_loc =
127  "amplxe-cl";
128  const std::string output_dir = "./vtune/vtune.0";
129  cmd << vtune_loc
130  << " -collect hotspots -result-dir " << output_dir
131  << " -target-pid " << my_os_pid << " &";
132  std::cout << cmd.str() << std::endl;
133  system(cmd.str().c_str());
134  system("sleep 10");
135  }
136 
137  Kokkos::initialize(argc,argv);
138 #ifdef KOKKOS_ENABLE_THREADS
139  if (threads) {
140  typedef Kokkos::Threads Device;
141 
142  std::cout << std::endl
143  << "Threads performance with " << num_cores*num_hyper_threads
144  << " threads:" << std::endl;
145 
146  performance_test_driver<Device>(
147  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
148  }
149 #endif
150 
151 #ifdef KOKKOS_ENABLE_OPENMP
152  if (openmp) {
153  typedef Kokkos::OpenMP Device;
154 
155  std::cout << std::endl
156  << "OpenMP performance with " << num_cores*num_hyper_threads
157  << " threads:" << std::endl;
158 
159  performance_test_driver<Device>(
160  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
161  }
162 #endif
163 
164 #ifdef KOKKOS_ENABLE_CUDA
165  if (cuda) {
166  typedef Kokkos::Cuda Device;
167 
168  cudaDeviceProp deviceProp;
169  cudaGetDeviceProperties(&deviceProp, device_id);
170  std::cout << std::endl
171  << "CUDA performance performance with device " << device_id
172  << " ("
173  << deviceProp.name << "):"
174  << std::endl;
175 
176  performance_test_driver<Device>(
177  print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
178 
179  }
180 #endif
181  Kokkos::finalize();
182  }
183  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
184 
185  if (success)
186  return 0;
187  return -1;
188 }
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
int main()
Definition: ad_example.cpp:191
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])