Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
view/TestAssembly.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Utilities
15 
16 // FENL
17 #include <BoxElemFixture.hpp>
18 #include <fenl_functors.hpp>
19 
20 struct Perf {
21  size_t global_elem_count ;
22  size_t global_node_count ;
23  double fill_time ;
24 
27  fill_time(0) {}
28 
29  void increment(const Perf& p) {
32  fill_time += p.fill_time;
33  }
34 
35  void scale(double s) {
36  fill_time *= s;
37  }
38 };
39 
40 template <typename Scalar, typename Device,
44  const int use_print ,
45  const int use_trials ,
46  const int use_nodes[] ,
47  Kokkos::View< Scalar* , Device >& residual,
49 {
50  using Teuchos::RCP;
51  using Teuchos::rcp;
52  using Teuchos::rcpFromRef;
53  using Teuchos::arrayView;
54 
56 
58  typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
59 
61 
63 
64  typedef typename ElementComputationType::vector_type VectorType ;
65 
66  //------------------------------------
67 
68  // Decompose by node to avoid parallel communication in assembly
69 
70  const double bubble_x = 1.0 ;
71  const double bubble_y = 1.0 ;
72  const double bubble_z = 1.0 ;
73 
74  const FixtureType fixture( Kokkos::Example::BoxElemPart::DecomposeNode ,
75  1 , 0 ,
76  use_nodes[0] , use_nodes[1] , use_nodes[2] ,
77  bubble_x , bubble_y , bubble_z );
78 
79  //------------------------------------
80 
81  Kokkos::Timer wall_clock ;
82 
83  Perf perf_stats = Perf() ;
84 
85  for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
86 
87  Perf perf = Perf() ;
88 
89  perf.global_elem_count = fixture.elem_count_global();
90  perf.global_node_count = fixture.node_count_global();
91 
92  //----------------------------------
93  // Create the local sparse matrix graph and element-to-graph map
94  // from the element->to->node identifier array.
95  // The graph only has rows for the owned nodes.
96 
97  typename NodeNodeGraphType::Times graph_times;
98  const NodeNodeGraphType
99  mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
100  graph_times );
101 
102  // Create the local sparse matrix from the graph:
103  jacobian = LocalMatrixType( mesh_to_graph.graph );
104 
105  //----------------------------------
106 
107  // Allocate solution vector for each node in the mesh and residual vector for each owned node
108  VectorType solution( "solution" , fixture.node_count() );
109  residual = VectorType( "residual" , fixture.node_count_owned() );
110 
111  // Create element computation functor
112  const ElementComputationType elemcomp( fixture , solution ,
113  mesh_to_graph.elem_graph ,
114  jacobian , residual );
115 
116  Kokkos::deep_copy( solution , Scalar(1.2345) );
117 
118  //--------------------------------
119  // Element contributions to residual and jacobian
120 
121  Kokkos::deep_copy( residual , Scalar(0) );
122  Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
123 
124  wall_clock.reset();
125 
126  elemcomp.apply();
127 
128  Device().fence();
129  perf.fill_time = wall_clock.seconds();
130 
131  //--------------------------------
132 
133  perf_stats.increment(perf);
134 
135  }
136 
137  return perf_stats ;
138 }
139 
140 template<class ValueType>
141 bool compareValues(const ValueType& a1,
142  const std::string& a1_name,
143  const ValueType&a2,
144  const std::string& a2_name,
145  const ValueType& rel_tol, const ValueType& abs_tol,
147 {
148  bool success = true;
149 
150  ValueType err = std::abs(a1 - a2);
151  ValueType tol = abs_tol + rel_tol*std::max(std::abs(a1),std::abs(a2));
152  if (err > tol) {
153  out << "\nError, relErr(" << a1_name <<","
154  << a2_name << ") = relErr(" << a1 <<"," << a2 <<") = "
155  << err << " <= tol = " << tol << ": failed!\n";
156  success = false;
157  }
158 
159  return success;
160 }
161 
162 template <typename VectorType, typename MatrixType>
163 bool check_assembly(const VectorType& analytic_residual,
164  const MatrixType& analytic_jacobian,
165  const VectorType& fad_residual,
166  const MatrixType& fad_jacobian,
167  const std::string& test_name)
168 {
169  const double tol = 1e-14;
170  bool success = true;
172  Teuchos::VerboseObjectBase::getDefaultOStream();
173  std::stringstream buf;
174  Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,false));
175 
176  typename VectorType::HostMirror host_analytic_residual =
177  Kokkos::create_mirror_view(analytic_residual);
178  typename VectorType::HostMirror host_fad_residual =
179  Kokkos::create_mirror_view(fad_residual);
180  Kokkos::deep_copy( host_analytic_residual, analytic_residual );
181  Kokkos::deep_copy( host_fad_residual, fad_residual );
182 
183  fbuf << test_name << ":" << std::endl;
184 
185  if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
186  fbuf << "Analytic residual dimension "
187  << host_analytic_residual.extent(0)
188  << " does not match Fad residual dimension "
189  << host_fad_residual.extent(0) << std::endl;
190  success = false;
191  }
192  else {
193  const size_t num_node = host_analytic_residual.extent(0);
194  for (size_t i=0; i<num_node; ++i) {
195  success = success && compareValues(
196  host_analytic_residual(i), "analytic residual",
197  host_fad_residual(i), "Fad residual",
198  tol, tol, fbuf );
199  }
200  }
201 
202  typename MatrixType::HostMirror host_analytic_jacobian =
203  Kokkos::create_mirror_view(analytic_jacobian);
204  typename MatrixType::HostMirror host_fad_jacobian =
205  Kokkos::create_mirror_view(fad_jacobian);
206  Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
207  Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
208 
209  if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
210  fbuf << "Analytic Jacobian dimension "
211  << host_analytic_jacobian.extent(0)
212  << " does not match Fad Jacobian dimension "
213  << host_fad_jacobian.extent(0) << std::endl;
214  success = false;
215  }
216  else {
217  const size_t num_entry = host_analytic_jacobian.extent(0);
218  for (size_t i=0; i<num_entry; ++i) {
219  success = success && compareValues(
220  host_analytic_jacobian(i), "analytic Jacobian",
221  host_fad_jacobian(i), "Fad Jacobian",
222  tol, tol, fbuf );
223  }
224  }
225 
226  if (!success)
227  *out << buf.str();
228 
229  return success;
230 }
231 
232 template <class Device>
234  const int use_print ,
235  const int use_trials ,
236  const int n_begin ,
237  const int n_end ,
238  const int n_step ,
239  const bool quadratic ,
240  const bool check )
241 {
247 
248  std::cout.precision(8);
249  std::cout << std::endl
250  << "\"Grid Size\" , "
251  << "\"FEM Size\" , "
252  << "\"Analytic Fill Time\" , "
253  << "\"Fad Element Fill Slowdown\" , "
254  << "\"Fad Optimized Element Fill Slowdown\" , "
255  << "\"Fad QP Fill Slowdown\" , "
256  << std::endl;
257 
258  typedef Kokkos::View< double* , Device > vector_type ;
260  vector_type analytic_residual, fad_residual, fad_opt_residual,
261  fad_qp_residual;
262  matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
263  fad_qp_jacobian;
264 
265  for (int n=n_begin; n<=n_end; n+=n_step) {
266  const int use_nodes[] = { n, n, n };
267  Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
268 
269  if (quadratic) {
270  perf_analytic =
271  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
272  use_print, use_trials, use_nodes,
273  analytic_residual, analytic_jacobian );
274 
275  perf_fad =
276  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
277  use_print, use_trials, use_nodes,
278  fad_residual, fad_jacobian);
279 
280  perf_fad_opt =
281  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
282  use_print, use_trials, use_nodes,
283  fad_opt_residual, fad_opt_jacobian);
284 
285  perf_fad_qp =
286  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
287  use_print, use_trials, use_nodes,
288  fad_qp_residual, fad_qp_jacobian);
289  }
290  else {
291  perf_analytic =
292  fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
293  use_print, use_trials, use_nodes,
294  analytic_residual, analytic_jacobian );
295 
296  perf_fad =
297  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
298  use_print, use_trials, use_nodes,
299  fad_residual, fad_jacobian);
300 
301  perf_fad_opt =
302  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
303  use_print, use_trials, use_nodes,
304  fad_opt_residual, fad_opt_jacobian);
305 
306  perf_fad_qp =
307  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
308  use_print, use_trials, use_nodes,
309  fad_qp_residual, fad_qp_jacobian);
310  }
311  if (check) {
312  check_assembly( analytic_residual, analytic_jacobian.coeff,
313  fad_residual, fad_jacobian.coeff,
314  "Fad" );
315  check_assembly( analytic_residual, analytic_jacobian.coeff,
316  fad_opt_residual, fad_opt_jacobian.coeff,
317  "Optimized Fad" );
318  check_assembly( analytic_residual, analytic_jacobian.coeff,
319  fad_qp_residual, fad_qp_jacobian.coeff,
320  "QP Fad" );
321  }
322 
323  double s =
324  1000.0 / ( use_trials * perf_analytic.global_elem_count );
325  perf_analytic.scale(s);
326  perf_fad.scale(s);
327  perf_fad_opt.scale(s);
328  perf_fad_qp.scale(s);
329 
330  std::cout.precision(3);
331  std::cout << n << " , "
332  << perf_analytic.global_node_count << " , "
333  << std::setw(2)
334  << std::scientific
335  << perf_analytic.fill_time << " , "
336  << std::fixed << std::setw(6)
337  << perf_fad.fill_time / perf_analytic.fill_time << " , "
338  << perf_fad_opt.fill_time / perf_analytic.fill_time << " , "
339  << perf_fad_qp.fill_time / perf_analytic.fill_time << " , "
340  << std::endl;
341  }
342 }
const char * p
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
size_t global_node_count
abs(expr.val())
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
double fill_time
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
size_t global_elem_count
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
void increment(const Perf &p)
void scale(double s)
#define Method
const double tol
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
Definition: BoxElemPart.hpp:83
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...