Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestAssembly.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Utilities
15 
16 // FENL
17 #include <BoxElemFixture.hpp>
18 #include <fenl_functors.hpp>
19 
20 struct Perf {
23  double fill_time ;
24 
27  fill_time(0) {}
28 
29  void increment(const Perf& p) {
32  fill_time += p.fill_time;
33  }
34 
35  void scale(double s) {
36  fill_time *= s;
37  }
38 };
39 
40 template <typename Scalar, typename Device,
44  const int use_print ,
45  const int use_trials ,
46  const int use_nodes[] ,
47  Kokkos::View< Scalar* , Device >& residual,
49 {
50  using Teuchos::RCP;
51  using Teuchos::rcp;
52  using Teuchos::rcpFromRef;
53  using Teuchos::arrayView;
54 
56 
58  typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
59 
61 
63 
64  typedef typename ElementComputationType::vector_type VectorType ;
65 
66  //------------------------------------
67 
68  // Decompose by node to avoid parallel communication in assembly
69 
70  const double bubble_x = 1.0 ;
71  const double bubble_y = 1.0 ;
72  const double bubble_z = 1.0 ;
73 
74  const FixtureType fixture( Kokkos::Example::BoxElemPart::DecomposeNode ,
75  1 , 0 ,
76  use_nodes[0] , use_nodes[1] , use_nodes[2] ,
77  bubble_x , bubble_y , bubble_z );
78 
79  //------------------------------------
80 
81  Kokkos::Timer wall_clock ;
82 
83  Perf perf_stats = Perf() ;
84 
85  for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
86 
87  Perf perf = Perf() ;
88 
89  perf.global_elem_count = fixture.elem_count_global();
90  perf.global_node_count = fixture.node_count_global();
91 
92  //----------------------------------
93  // Create the local sparse matrix graph and element-to-graph map
94  // from the element->to->node identifier array.
95  // The graph only has rows for the owned nodes.
96 
97  typename NodeNodeGraphType::Times graph_times;
98  const NodeNodeGraphType
99  mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
100  graph_times );
101 
102  // Create the local sparse matrix from the graph:
103  jacobian = LocalMatrixType( mesh_to_graph.graph );
104 
105  //----------------------------------
106 
107  // Allocate solution vector for each node in the mesh and residual vector for each owned node
108  VectorType solution( "solution" , fixture.node_count() );
109  residual = VectorType( "residual" , fixture.node_count_owned() );
110 
111  // Create element computation functor
112  const ElementComputationType elemcomp( fixture , solution ,
113  mesh_to_graph.elem_graph ,
114  jacobian , residual );
115 
116  Kokkos::deep_copy( solution , Scalar(1.2345) );
117 
118  //--------------------------------
119  // Element contributions to residual and jacobian
120 
121  Kokkos::deep_copy( residual , Scalar(0) );
122  Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
123 
124  wall_clock.reset();
125 
126  elemcomp.apply();
127 
128  Device().fence();
129  perf.fill_time = wall_clock.seconds();
130 
131  //--------------------------------
132 
133  perf_stats.increment(perf);
134 
135  }
136 
137  return perf_stats ;
138 }
139 
140 template<class ValueType>
141 bool compareValues(const ValueType& a1,
142  const std::string& a1_name,
143  const ValueType&a2,
144  const std::string& a2_name,
145  const ValueType& rel_tol, const ValueType& abs_tol,
147 {
148  bool success = true;
149 
150  ValueType err = std::abs(a1 - a2);
151  ValueType tol = abs_tol + rel_tol*std::max(std::abs(a1),std::abs(a2));
152  if (err > tol) {
153  out << "\nError, relErr(" << a1_name <<","
154  << a2_name << ") = relErr(" << a1 <<"," << a2 <<") = "
155  << err << " <= tol = " << tol << ": failed!\n";
156  success = false;
157  }
158 
159  return success;
160 }
161 
162 template <typename VectorType, typename MatrixType>
163 bool check_assembly(const VectorType& analytic_residual,
164  const MatrixType& analytic_jacobian,
165  const VectorType& fad_residual,
166  const MatrixType& fad_jacobian,
167  const std::string& test_name)
168 {
169  const double tol = 1e-14;
170  bool success = true;
172  Teuchos::VerboseObjectBase::getDefaultOStream();
173  std::stringstream buf;
174  Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,false));
175 
176  typename VectorType::HostMirror host_analytic_residual =
177  Kokkos::create_mirror_view(analytic_residual);
178  typename VectorType::HostMirror host_fad_residual =
179  Kokkos::create_mirror_view(fad_residual);
180  Kokkos::deep_copy( host_analytic_residual, analytic_residual );
181  Kokkos::deep_copy( host_fad_residual, fad_residual );
182 
183  fbuf << test_name << ":" << std::endl;
184 
185  if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
186  fbuf << "Analytic residual dimension "
187  << host_analytic_residual.extent(0)
188  << " does not match Fad residual dimension "
189  << host_fad_residual.extent(0) << std::endl;
190  success = false;
191  }
192  else {
193  const size_t num_node = host_analytic_residual.extent(0);
194  for (size_t i=0; i<num_node; ++i) {
195  success = success && compareValues(
196  host_analytic_residual(i), "analytic residual",
197  host_fad_residual(i), "Fad residual",
198  tol, tol, fbuf );
199  }
200  }
201 
202  typename MatrixType::HostMirror host_analytic_jacobian =
203  Kokkos::create_mirror_view(analytic_jacobian);
204  typename MatrixType::HostMirror host_fad_jacobian =
205  Kokkos::create_mirror_view(fad_jacobian);
206  Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
207  Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
208 
209  if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
210  fbuf << "Analytic Jacobian dimension "
211  << host_analytic_jacobian.extent(0)
212  << " does not match Fad Jacobian dimension "
213  << host_fad_jacobian.extent(0) << std::endl;
214  success = false;
215  }
216  else {
217  const size_t num_entry = host_analytic_jacobian.extent(0);
218  for (size_t i=0; i<num_entry; ++i) {
219  success = success && compareValues(
220  host_analytic_jacobian(i), "analytic Jacobian",
221  host_fad_jacobian(i), "Fad Jacobian",
222  tol, tol, fbuf );
223  }
224  }
225 
226  if (!success)
227  *out << buf.str();
228 
229  return success;
230 }
231 
232 template <class Device>
234  const int use_print ,
235  const int use_trials ,
236  const int n_begin ,
237  const int n_end ,
238  const int n_step ,
239  const bool quadratic ,
240  const bool check )
241 {
247 
248  std::cout.precision(8);
249  std::cout << std::endl
250  << "\"Grid Size\" , "
251  << "\"FEM Size\" , "
252  << "\"Analytic Fill Time\" , "
253  << "\"Fad Element Fill Slowdown\" , "
254  << "\"Fad Optimized Element Fill Slowdown\" , "
255  << "\"Fad QP Fill Slowdown\" , "
256  << std::endl;
257 
258  typedef Kokkos::View< double* , Device > vector_type ;
260  vector_type analytic_residual, fad_residual, fad_opt_residual,
261  fad_qp_residual;
262  matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
263  fad_qp_jacobian;
264 
265  for (int n=n_begin; n<=n_end; n+=n_step) {
266  const int use_nodes[] = { n, n, n };
267  Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
268 
269  if (quadratic) {
270  perf_analytic =
271  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
272  use_print, use_trials, use_nodes,
273  analytic_residual, analytic_jacobian );
274 
275  perf_fad =
276  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
277  use_print, use_trials, use_nodes,
278  fad_residual, fad_jacobian);
279 
280  perf_fad_opt =
281  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
282  use_print, use_trials, use_nodes,
283  fad_opt_residual, fad_opt_jacobian);
284 
285  perf_fad_qp =
286  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
287  use_print, use_trials, use_nodes,
288  fad_qp_residual, fad_qp_jacobian);
289  }
290  else {
291  perf_analytic =
292  fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
293  use_print, use_trials, use_nodes,
294  analytic_residual, analytic_jacobian );
295 
296  perf_fad =
297  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
298  use_print, use_trials, use_nodes,
299  fad_residual, fad_jacobian);
300 
301  perf_fad_opt =
302  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
303  use_print, use_trials, use_nodes,
304  fad_opt_residual, fad_opt_jacobian);
305 
306  perf_fad_qp =
307  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
308  use_print, use_trials, use_nodes,
309  fad_qp_residual, fad_qp_jacobian);
310  }
311  if (check) {
312  check_assembly( analytic_residual, analytic_jacobian.coeff,
313  fad_residual, fad_jacobian.coeff,
314  "Fad" );
315  check_assembly( analytic_residual, analytic_jacobian.coeff,
316  fad_opt_residual, fad_opt_jacobian.coeff,
317  "Optimized Fad" );
318  check_assembly( analytic_residual, analytic_jacobian.coeff,
319  fad_qp_residual, fad_qp_jacobian.coeff,
320  "QP Fad" );
321  }
322 
323  double s =
324  1000.0 / ( use_trials * perf_analytic.global_elem_count );
325  perf_analytic.scale(s);
326  perf_fad.scale(s);
327  perf_fad_opt.scale(s);
328  perf_fad_qp.scale(s);
329 
330  std::cout.precision(3);
331  std::cout << n << " , "
332  << perf_analytic.global_node_count << " , "
333  << std::setw(2)
334  << std::scientific
335  << perf_analytic.fill_time << " , "
336  << std::fixed << std::setw(6)
337  << perf_fad.fill_time / perf_analytic.fill_time << " , "
338  << perf_fad_opt.fill_time / perf_analytic.fill_time << " , "
339  << perf_fad_qp.fill_time / perf_analytic.fill_time << " , "
340  << std::endl;
341  }
342 }
const char * p
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
size_t global_node_count
abs(expr.val())
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
double fill_time
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
size_t global_elem_count
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
void increment(const Perf &p)
void scale(double s)
#define Method
const double tol
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
Definition: BoxElemPart.hpp:83
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...