40 template <
typename Scalar,
typename Device,
45 const int use_trials ,
46 const int use_nodes[] ,
47 Kokkos::View< Scalar* , Device >& residual,
52 using Teuchos::rcpFromRef;
53 using Teuchos::arrayView;
58 typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
64 typedef typename ElementComputationType::vector_type VectorType ;
70 const double bubble_x = 1.0 ;
71 const double bubble_y = 1.0 ;
72 const double bubble_z = 1.0 ;
76 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
77 bubble_x , bubble_y , bubble_z );
81 Kokkos::Timer wall_clock ;
85 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
97 typename NodeNodeGraphType::Times graph_times;
98 const NodeNodeGraphType
99 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
103 jacobian = LocalMatrixType( mesh_to_graph.graph );
108 VectorType solution(
"solution" , fixture.node_count() );
109 residual = VectorType(
"residual" , fixture.node_count_owned() );
112 const ElementComputationType elemcomp( fixture , solution ,
113 mesh_to_graph.elem_graph ,
114 jacobian , residual );
116 Kokkos::deep_copy( solution , Scalar(1.2345) );
121 Kokkos::deep_copy( residual , Scalar(0) );
122 Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
140 template<
class ValueType>
142 const std::string& a1_name,
144 const std::string& a2_name,
145 const ValueType& rel_tol,
const ValueType& abs_tol,
153 out <<
"\nError, relErr(" << a1_name <<
","
154 << a2_name <<
") = relErr(" << a1 <<
"," << a2 <<
") = "
155 << err <<
" <= tol = " << tol <<
": failed!\n";
162 template <
typename VectorType,
typename MatrixType>
164 const MatrixType& analytic_jacobian,
165 const VectorType& fad_residual,
166 const MatrixType& fad_jacobian,
167 const std::string& test_name)
169 const double tol = 1e-14;
172 Teuchos::VerboseObjectBase::getDefaultOStream();
173 std::stringstream buf;
176 typename VectorType::HostMirror host_analytic_residual =
177 Kokkos::create_mirror_view(analytic_residual);
178 typename VectorType::HostMirror host_fad_residual =
179 Kokkos::create_mirror_view(fad_residual);
180 Kokkos::deep_copy( host_analytic_residual, analytic_residual );
181 Kokkos::deep_copy( host_fad_residual, fad_residual );
183 fbuf << test_name <<
":" << std::endl;
185 if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
186 fbuf <<
"Analytic residual dimension "
187 << host_analytic_residual.extent(0)
188 <<
" does not match Fad residual dimension "
189 << host_fad_residual.extent(0) << std::endl;
193 const size_t num_node = host_analytic_residual.extent(0);
194 for (
size_t i=0;
i<num_node; ++
i) {
196 host_analytic_residual(
i),
"analytic residual",
197 host_fad_residual(
i),
"Fad residual",
202 typename MatrixType::HostMirror host_analytic_jacobian =
203 Kokkos::create_mirror_view(analytic_jacobian);
204 typename MatrixType::HostMirror host_fad_jacobian =
205 Kokkos::create_mirror_view(fad_jacobian);
206 Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
207 Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
209 if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
210 fbuf <<
"Analytic Jacobian dimension "
211 << host_analytic_jacobian.extent(0)
212 <<
" does not match Fad Jacobian dimension "
213 << host_fad_jacobian.extent(0) << std::endl;
217 const size_t num_entry = host_analytic_jacobian.extent(0);
218 for (
size_t i=0;
i<num_entry; ++
i) {
220 host_analytic_jacobian(
i),
"analytic Jacobian",
221 host_fad_jacobian(
i),
"Fad Jacobian",
232 template <
class Device>
234 const int use_print ,
235 const int use_trials ,
239 const bool quadratic ,
248 std::cout.precision(8);
249 std::cout << std::endl
250 <<
"\"Grid Size\" , "
252 <<
"\"Analytic Fill Time\" , "
253 <<
"\"Fad Element Fill Slowdown\" , "
254 <<
"\"Fad Optimized Element Fill Slowdown\" , "
255 <<
"\"Fad QP Fill Slowdown\" , "
258 typedef Kokkos::View< double* , Device > vector_type ;
260 vector_type analytic_residual, fad_residual, fad_opt_residual,
262 matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
265 for (
int n=n_begin;
n<=n_end;
n+=n_step) {
266 const int use_nodes[] = {
n,
n, n };
267 Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
271 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
272 use_print, use_trials, use_nodes,
273 analytic_residual, analytic_jacobian );
276 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
277 use_print, use_trials, use_nodes,
278 fad_residual, fad_jacobian);
281 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
282 use_print, use_trials, use_nodes,
283 fad_opt_residual, fad_opt_jacobian);
286 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
287 use_print, use_trials, use_nodes,
288 fad_qp_residual, fad_qp_jacobian);
292 fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
293 use_print, use_trials, use_nodes,
294 analytic_residual, analytic_jacobian );
297 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
298 use_print, use_trials, use_nodes,
299 fad_residual, fad_jacobian);
302 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
303 use_print, use_trials, use_nodes,
304 fad_opt_residual, fad_opt_jacobian);
307 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
308 use_print, use_trials, use_nodes,
309 fad_qp_residual, fad_qp_jacobian);
313 fad_residual, fad_jacobian.coeff,
316 fad_opt_residual, fad_opt_jacobian.coeff,
319 fad_qp_residual, fad_qp_jacobian.coeff,
325 perf_analytic.
scale(s);
327 perf_fad_opt.
scale(s);
328 perf_fad_qp.
scale(s);
330 std::cout.precision(3);
331 std::cout <<
n <<
" , "
336 << std::fixed << std::setw(6)
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
void increment(const Perf &p)
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...