17 #include "Sacado_mpl_range_c.hpp"
18 #include "Sacado_mpl_for_each.hpp"
19 #include "Sacado_mpl_integral_c.hpp"
22 #include <Kokkos_UnorderedMap.hpp>
23 #include <Kokkos_StaticCrsGraph.hpp>
24 #include <Kokkos_Timer.hpp>
80 template <
typename Scalar,
typename Device>
84 const int use_trials ,
85 const int use_nodes[] ,
87 Kokkos::View< Scalar* , Kokkos::LayoutLeft, Device >& nodal_residual)
91 using Teuchos::rcpFromRef;
92 using Teuchos::arrayView;
99 typedef typename LocalMatrixType::StaticCrsGraphType
108 ElementComputationType ;
111 DirichletComputationType ;
113 typedef typename ElementComputationType::vector_type VectorType ;
116 typename FixtureType::comm_list_type ,
117 typename FixtureType::send_nodeid_type ,
118 VectorType > ImportType ;
122 const int print_flag = use_print && std::is_same< Kokkos::HostSpace , typename Device::memory_space >::value ;
124 const int comm_rank = comm->getRank();
125 const int comm_size = comm->getSize();
129 const double bubble_x = 1.0 ;
130 const double bubble_y = 1.0 ;
131 const double bubble_z = 1.0 ;
134 comm_size , comm_rank ,
135 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
136 bubble_x , bubble_y , bubble_z );
138 if (
maximum(*comm, ( fixture.ok() ? 0 : 1 ) ) ) {
139 throw std::runtime_error(std::string(
"Problem fixture setup failed"));
144 const ImportType comm_nodal_import(
146 fixture.recv_node() ,
147 fixture.send_node() ,
148 fixture.send_nodeid() ,
149 fixture.node_count_owned() ,
150 fixture.node_count() - fixture.node_count_owned() );
154 const double bc_lower_value = 1 ;
155 const double bc_upper_value = 2 ;
157 CoeffFunctionType diffusion_coefficient( 1.0, 0.1, 1.0, 5 );
163 std::cout <<
"ElemNode {" << std::endl ;
164 for (
unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
165 std::cout <<
" elem[" << ielem <<
"]{" ;
166 for (
unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
167 std::cout <<
" " << fixture.elem_node(ielem,inode);
169 std::cout <<
" }" << std::endl ;
171 std::cout <<
"}" << std::endl ;
176 Kokkos::Timer wall_clock ;
180 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
192 typename NodeNodeGraphType::Times graph_times;
193 const NodeNodeGraphType
194 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
198 LocalMatrixType jacobian( mesh_to_graph.graph );
203 VectorType nodal_solution(
"nodal_solution" , fixture.node_count() );
204 nodal_residual = VectorType(
"nodal_residual" , fixture.node_count_owned() );
212 const ElementComputationType elemcomp( fixture , diffusion_coefficient ,
214 mesh_to_graph.elem_graph ,
215 jacobian , nodal_residual ,
219 const DirichletComputationType dirichlet(
220 fixture , nodal_solution , jacobian , nodal_residual ,
233 comm_nodal_import( nodal_solution );
263 perf_stats.
min(perf);
270 template <
typename ScalarViewType,
typename EnsembleViewType>
272 const EnsembleViewType& ensemble_residual)
274 const double tol = 1e-14;
277 Teuchos::VerboseObjectBase::getDefaultOStream();
278 std::stringstream buf;
281 typename ScalarViewType::HostMirror host_scalar_residual =
283 typename EnsembleViewType::HostMirror host_ensemble_residual =
289 host_ensemble_residual.extent(0), fbuf, success );
291 const size_t num_node = host_scalar_residual.extent(0);
293 for (
size_t i=0; i<num_node; ++i) {
294 for (
size_t j=0;
j<num_ensemble; ++
j) {
297 tol, fbuf, success );
307 template <
class Storage>
320 const int use_print_ ,
321 const int use_trials_ ,
322 const int use_nodes_[] ,
332 template <
typename ArgT>
334 const int ensemble = ArgT::value;
335 typedef typename Storage::template apply_N<ensemble> NewStorageApply;
339 typedef Kokkos::View< Scalar* , Kokkos::LayoutLeft, Device > scalar_vector_type ;
340 typedef Kokkos::View< mp_vector_type* , Kokkos::LayoutLeft, Device > ensemble_vector_type ;
342 scalar_vector_type scalar_residual;
345 fenl_assembly<Scalar,Device>(
347 scalar_dev_config, scalar_residual );
349 ensemble_vector_type ensemble_residual;
351 #if defined( KOKKOS_ENABLE_CUDA )
352 const bool is_cuda = std::is_same<Device,Kokkos::Cuda>::value;
354 const bool is_cuda = false ;
359 ensemble_dev_config.
block_dim.
y = block_size/ensemble;
362 fenl_assembly<mp_vector_type,Device>(
364 ensemble_dev_config, ensemble_residual);
372 if (comm->getRank() == 0) {
373 std::cout.precision(3);
374 std::cout << use_nodes[0] <<
" , "
376 << std::setw(2) << ensemble <<
" , "
380 << std::fixed << std::setw(6)
385 << std::fixed << std::setw(6)
392 template <
class Storage,
int entry_min,
int entry_max,
int entry_step>
394 const int use_print ,
395 const int use_trials ,
396 const int use_nodes[] ,
400 if (comm->getRank() == 0) {
401 std::cout.precision(8);
402 std::cout << std::endl
403 <<
"\"Grid Size\" , "
405 <<
"\"Ensemble Size\" , "
406 <<
"\"Scalar Import Time (ms)\" , "
407 <<
"\"Ensemble Import Time (ms)\" , "
408 <<
"\"Ensemble Import Speedup\" , "
409 <<
"\"Scalar Fill Time (ms)\" , "
410 <<
"\"Ensemble Fill Time (ms)\" , "
411 <<
"\"Ensemble Fill Speedup\" , "
416 typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
418 use_nodes, check, dev_config);
419 Sacado::mpl::for_each_no_kokkos<Range>
f(op);
Perf fenl_assembly(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], Kokkos::Example::FENL::DeviceConfig dev_config, Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > &nodal_residual)
bool check_residuals(const ScalarViewType &scalar_residual, const EnsembleViewType &ensemble_residual)
Stokhos::StandardStorage< int, double > storage_type
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
#define TEUCHOS_TEST_FLOATING_EQUALITY(v1, v2, tol, out, success)
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void increment(const Perf &p)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j)-expr2.val(j)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
#define TEUCHOS_TEST_EQUALITY(v1, v2, out, success)
double maximum(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, double local)
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
void reduceMax(const Teuchos::Comm< int > &comm)
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)