Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestAssembly.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // Utilities
46 
47 // FENL
48 #include <BoxElemFixture.hpp>
49 #include <fenl_functors.hpp>
50 
51 struct Perf {
54  double fill_time ;
55 
58  fill_time(0) {}
59 
60  void increment(const Perf& p) {
63  fill_time += p.fill_time;
64  }
65 
66  void scale(double s) {
67  fill_time *= s;
68  }
69 };
70 
71 template <typename Scalar, typename Device,
75  const int use_print ,
76  const int use_trials ,
77  const int use_nodes[] ,
78  Kokkos::View< Scalar* , Device >& residual,
80 {
81  using Teuchos::RCP;
82  using Teuchos::rcp;
83  using Teuchos::rcpFromRef;
84  using Teuchos::arrayView;
85 
87 
89  typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
90 
92 
94 
95  typedef typename ElementComputationType::vector_type VectorType ;
96 
97  //------------------------------------
98 
99  // Decompose by node to avoid parallel communication in assembly
100 
101  const double bubble_x = 1.0 ;
102  const double bubble_y = 1.0 ;
103  const double bubble_z = 1.0 ;
104 
105  const FixtureType fixture( Kokkos::Example::BoxElemPart::DecomposeNode ,
106  1 , 0 ,
107  use_nodes[0] , use_nodes[1] , use_nodes[2] ,
108  bubble_x , bubble_y , bubble_z );
109 
110  //------------------------------------
111 
112  Kokkos::Impl::Timer wall_clock ;
113 
114  Perf perf_stats = Perf() ;
115 
116  for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
117 
118  Perf perf = Perf() ;
119 
120  perf.global_elem_count = fixture.elem_count_global();
121  perf.global_node_count = fixture.node_count_global();
122 
123  //----------------------------------
124  // Create the local sparse matrix graph and element-to-graph map
125  // from the element->to->node identifier array.
126  // The graph only has rows for the owned nodes.
127 
128  typename NodeNodeGraphType::Times graph_times;
129  const NodeNodeGraphType
130  mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
131  graph_times );
132 
133  // Create the local sparse matrix from the graph:
134  jacobian = LocalMatrixType( mesh_to_graph.graph );
135 
136  //----------------------------------
137 
138  // Allocate solution vector for each node in the mesh and residual vector for each owned node
139  VectorType solution( "solution" , fixture.node_count() );
140  residual = VectorType( "residual" , fixture.node_count_owned() );
141 
142  // Create element computation functor
143  const ElementComputationType elemcomp( fixture , solution ,
144  mesh_to_graph.elem_graph ,
145  jacobian , residual );
146 
147  Kokkos::deep_copy( solution , Scalar(1.2345) );
148 
149  //--------------------------------
150  // Element contributions to residual and jacobian
151 
152  Kokkos::deep_copy( residual , Scalar(0) );
153  Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
154 
155  wall_clock.reset();
156 
157  elemcomp.apply();
158 
159  Device::fence();
160  perf.fill_time = wall_clock.seconds();
161 
162  //--------------------------------
163 
164  perf_stats.increment(perf);
165 
166  }
167 
168  return perf_stats ;
169 }
170 
171 template<class ValueType>
172 bool compareValues(const ValueType& a1,
173  const std::string& a1_name,
174  const ValueType&a2,
175  const std::string& a2_name,
176  const ValueType& rel_tol, const ValueType& abs_tol,
178 {
179  bool success = true;
180 
181  ValueType err = std::abs(a1 - a2);
182  ValueType tol = abs_tol + rel_tol*std::max(std::abs(a1),std::abs(a2));
183  if (err > tol) {
184  out << "\nError, relErr(" << a1_name <<","
185  << a2_name << ") = relErr(" << a1 <<"," << a2 <<") = "
186  << err << " <= tol = " << tol << ": failed!\n";
187  success = false;
188  }
189 
190  return success;
191 }
192 
193 template <typename VectorType, typename MatrixType>
194 bool check_assembly(const VectorType& analytic_residual,
195  const MatrixType& analytic_jacobian,
196  const VectorType& fad_residual,
197  const MatrixType& fad_jacobian,
198  const std::string& test_name)
199 {
200  const double tol = 1e-14;
201  bool success = true;
203  Teuchos::VerboseObjectBase::getDefaultOStream();
204  std::stringstream buf;
205  Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,false));
206 
207  typename VectorType::HostMirror host_analytic_residual =
208  Kokkos::create_mirror_view(analytic_residual);
209  typename VectorType::HostMirror host_fad_residual =
210  Kokkos::create_mirror_view(fad_residual);
211  Kokkos::deep_copy( host_analytic_residual, analytic_residual );
212  Kokkos::deep_copy( host_fad_residual, fad_residual );
213 
214  fbuf << test_name << ":" << std::endl;
215 
216  if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
217  fbuf << "Analytic residual dimension "
218  << host_analytic_residual.extent(0)
219  << " does not match Fad residual dimension "
220  << host_fad_residual.extent(0) << std::endl;
221  success = false;
222  }
223  else {
224  const size_t num_node = host_analytic_residual.extent(0);
225  for (size_t i=0; i<num_node; ++i) {
226  success = success && compareValues(
227  host_analytic_residual(i), "analytic residual",
228  host_fad_residual(i), "Fad residual",
229  tol, tol, fbuf );
230  }
231  }
232 
233  typename MatrixType::HostMirror host_analytic_jacobian =
234  Kokkos::create_mirror_view(analytic_jacobian);
235  typename MatrixType::HostMirror host_fad_jacobian =
236  Kokkos::create_mirror_view(fad_jacobian);
237  Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
238  Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
239 
240  if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
241  fbuf << "Analytic Jacobian dimension "
242  << host_analytic_jacobian.extent(0)
243  << " does not match Fad Jacobian dimension "
244  << host_fad_jacobian.extent(0) << std::endl;
245  success = false;
246  }
247  else {
248  const size_t num_entry = host_analytic_jacobian.extent(0);
249  for (size_t i=0; i<num_entry; ++i) {
250  success = success && compareValues(
251  host_analytic_jacobian(i), "analytic Jacobian",
252  host_fad_jacobian(i), "Fad Jacobian",
253  tol, tol, fbuf );
254  }
255  }
256 
257  if (!success)
258  *out << buf.str();
259 
260  return success;
261 }
262 
263 template <class Device>
265  const int use_print ,
266  const int use_trials ,
267  const int n_begin ,
268  const int n_end ,
269  const int n_step ,
270  const bool quadratic ,
271  const bool check )
272 {
278 
279  std::cout.precision(8);
280  std::cout << std::endl
281  << "\"Grid Size\" , "
282  << "\"FEM Size\" , "
283  << "\"Analytic Fill Time\" , "
284  << "\"Fad Element Fill Slowdown\" , "
285  << "\"Fad Optimized Element Fill Slowdown\" , "
286  << "\"Fad QP Fill Slowdown\" , "
287  << std::endl;
288 
289  typedef Kokkos::View< double* , Device > vector_type ;
291  vector_type analytic_residual, fad_residual, fad_opt_residual,
292  fad_qp_residual;
293  matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
294  fad_qp_jacobian;
295 
296  for (int n=n_begin; n<=n_end; n+=n_step) {
297  const int use_nodes[] = { n, n, n };
298  Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
299 
300  if (quadratic) {
301  perf_analytic =
302  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
303  use_print, use_trials, use_nodes,
304  analytic_residual, analytic_jacobian );
305 
306  perf_fad =
307  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
308  use_print, use_trials, use_nodes,
309  fad_residual, fad_jacobian);
310 
311  perf_fad_opt =
312  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
313  use_print, use_trials, use_nodes,
314  fad_opt_residual, fad_opt_jacobian);
315 
316  perf_fad_qp =
317  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
318  use_print, use_trials, use_nodes,
319  fad_qp_residual, fad_qp_jacobian);
320  }
321  else {
322  perf_analytic =
323  fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
324  use_print, use_trials, use_nodes,
325  analytic_residual, analytic_jacobian );
326 
327  perf_fad =
328  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
329  use_print, use_trials, use_nodes,
330  fad_residual, fad_jacobian);
331 
332  perf_fad_opt =
333  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
334  use_print, use_trials, use_nodes,
335  fad_opt_residual, fad_opt_jacobian);
336 
337  perf_fad_qp =
338  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
339  use_print, use_trials, use_nodes,
340  fad_qp_residual, fad_qp_jacobian);
341  }
342  if (check) {
343  check_assembly( analytic_residual, analytic_jacobian.coeff,
344  fad_residual, fad_jacobian.coeff,
345  "Fad" );
346  check_assembly( analytic_residual, analytic_jacobian.coeff,
347  fad_opt_residual, fad_opt_jacobian.coeff,
348  "Optimized Fad" );
349  check_assembly( analytic_residual, analytic_jacobian.coeff,
350  fad_qp_residual, fad_qp_jacobian.coeff,
351  "QP Fad" );
352  }
353 
354  double s =
355  1000.0 / ( use_trials * perf_analytic.global_elem_count );
356  perf_analytic.scale(s);
357  perf_fad.scale(s);
358  perf_fad_opt.scale(s);
359  perf_fad_qp.scale(s);
360 
361  std::cout.precision(3);
362  std::cout << n << " , "
363  << perf_analytic.global_node_count << " , "
364  << std::setw(2)
365  << std::scientific
366  << perf_analytic.fill_time << " , "
367  << std::fixed << std::setw(6)
368  << perf_fad.fill_time / perf_analytic.fill_time << " , "
369  << perf_fad_opt.fill_time / perf_analytic.fill_time << " , "
370  << perf_fad_qp.fill_time / perf_analytic.fill_time << " , "
371  << std::endl;
372  }
373 }
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
size_t global_node_count
abs(expr.val())
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
double fill_time
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
size_t global_elem_count
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
void increment(const Perf &p)
void scale(double s)
const double tol
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...