34 #include "impl/Kokkos_Timer.hpp" 
   36 template<
typename FluxView, 
typename WgbView, 
typename SrcView,
 
   37          typename WbsView, 
typename ResidualView>
 
   39                   const SrcView& src, 
const WbsView& wbs,
 
   40                   const ResidualView& residual)
 
   42   typedef typename ResidualView::execution_space execution_space;
 
   43   typedef typename ResidualView::non_const_value_type scalar_type;
 
   45   const size_t num_cells = wgb.extent(0);
 
   46   const int num_basis    = wgb.extent(1);
 
   47   const int num_points   = wgb.extent(2);
 
   48   const int num_dim      = wgb.extent(3);
 
   50   Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
 
   51                        KOKKOS_LAMBDA (
const size_t cell)
 
   53     scalar_type value, value2;
 
   54     for (
int basis=0; basis<num_basis; ++basis) {
 
   57       for (
int qp=0; qp<num_points; ++qp) {
 
   58         for (
int dim=0; dim<num_dim; ++dim)
 
   59           value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
 
   60         value2 += src(cell,qp)*wbs(cell,basis,qp);
 
   62       residual(cell,basis) = value+value2;
 
   67 template<
typename FluxView, 
typename WgbView, 
typename SrcView,
 
   68          typename WbsView, 
typename ResidualView>
 
   70                      const SrcView& src, 
const WbsView& wbs,
 
   71                      const ResidualView& residual)
 
   73   typedef typename ResidualView::execution_space execution_space;
 
   74   typedef typename ResidualView::non_const_value_type scalar_type;
 
   75   typedef Kokkos::TeamPolicy<execution_space> policy_type;
 
   76   typedef typename policy_type::member_type team_member;
 
   77   typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
 
   79   const size_t num_cells = wgb.extent(0);
 
   80   const int num_basis    = wgb.extent(1);
 
   81   const int num_points   = wgb.extent(2);
 
   82   const int num_dim      = wgb.extent(3);
 
   84   const int vector_size  = 1;
 
   86   const int fad_size     = Kokkos::dimension_scalar(residual);
 
   87   const size_t range     = (num_cells+team_size-1)/team_size;
 
   88   const size_t bytes     = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
 
   89   policy_type policy(range,team_size,vector_size);
 
   91   Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
 
   92                        KOKKOS_LAMBDA (
const team_member& team)
 
   94     const int team_rank = team.team_rank();
 
   95     tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
 
   96     tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
 
   97     const size_t cell = team.league_rank()*team_size + team_rank;
 
   98     if (cell < num_cells) {
 
   99       for (
int basis=0; basis<num_basis; ++basis) {
 
  100         value(team_rank) = 0.0;
 
  101         value2(team_rank) = 0.0;
 
  102         for (
int qp=0; qp<num_points; ++qp) {
 
  103           for (
int dim=0; dim<num_dim; ++dim)
 
  104             value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
 
  105           value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
 
  107         residual(cell,basis) = value(team_rank)+value2(team_rank);
 
  113 template<
int N, 
typename FluxView, 
typename WgbView, 
typename SrcView,
 
  114          typename WbsView, 
typename ResidualView>
 
  116                        const SrcView& src, 
const WbsView& wbs,
 
  117                        const ResidualView& residual)
 
  119   typedef typename ResidualView::execution_space execution_space;
 
  120   typedef typename ResidualView::non_const_value_type scalar_type;
 
  122   const size_t num_cells = wgb.extent(0);
 
  123   const int num_basis    = wgb.extent(1);
 
  124   const int num_points   = wgb.extent(2);
 
  125   const int num_dim      = wgb.extent(3);
 
  127   Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
 
  128                        KOKKOS_LAMBDA (
const size_t cell)
 
  130     scalar_type value[
N+1],value2[
N+1];
 
  131     for (
int basis=0; basis<num_basis; ++basis) {
 
  132       for (
int k=0; k<
N+1; ++k) {
 
  136       for (
int qp=0; qp<num_points; ++qp) {
 
  137         for (
int dim=0; dim<num_dim; ++dim) {
 
  138           const scalar_type flux_val = flux(cell,qp,dim,N);
 
  139           const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
 
  140           value[
N] += flux_val*wgb_val;
 
  141           for(
int k=0; k<
N; k++)
 
  143               flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
 
  145         const scalar_type src_val = src(cell,qp,N);
 
  146         const scalar_type wbs_val = wbs(cell,basis,qp,N);
 
  147         value2[
N] += src_val*wbs_val;
 
  148         for(
int k=0; k<
N; k++)
 
  149           value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
 
  151       for(
int k=0; k<=
N; k++)
 
  152         residual(cell,basis,k) = value[k]+value2[k];
 
  157 template<
int N, 
typename FluxView, 
typename WgbView, 
typename SrcView,
 
  158          typename WbsView, 
typename ResidualView>
 
  160                        const SrcView& src, 
const WbsView& wbs,
 
  161                        const ResidualView& residual)
 
  163   typedef typename ResidualView::execution_space execution_space;
 
  164   typedef typename ResidualView::non_const_value_type scalar_type;
 
  165   typedef Kokkos::TeamPolicy<execution_space> policy_type;
 
  166   typedef typename policy_type::member_type team_member;
 
  167   typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
 
  169   const size_t num_cells = wgb.extent(0);
 
  170   const int num_basis    = wgb.extent(1);
 
  171    int num_points   = wgb.extent(2);
 
  172    int num_dim      = wgb.extent(3);
 
  174   const size_t bytes     = 2*tmp_scratch_type::shmem_size();
 
  175   policy_type policy(num_cells,num_basis,32);
 
  176   Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
 
  177                        KOKKOS_LAMBDA (
const team_member& team)
 
  179     tmp_scratch_type value(team.thread_scratch(0));
 
  180     tmp_scratch_type value2(team.thread_scratch(0));
 
  181     const size_t cell = team.league_rank();
 
  182     Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
 
  183                          [&] (
const int& basis)
 
  185       Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
 
  191       for (
int qp=0; qp<num_points; ++qp) {
 
  192         for (
int dim=0; dim<num_dim; ++dim) {
 
  193           const scalar_type flux_val = flux(cell,qp,dim,
N);
 
  194           const scalar_type wgb_val = wgb(cell,basis,qp,dim,
N);
 
  195           Kokkos::single(Kokkos::PerThread(team), [&] () {
 
  196             value[
N] += flux_val*wgb_val;
 
  198           Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
 
  202               flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
 
  205         const scalar_type src_val = src(cell,qp,
N);
 
  206         const scalar_type wbs_val = wbs(cell,basis,qp,
N);
 
  207         Kokkos::single(Kokkos::PerThread(team), [&] () {
 
  208           value2[
N] += src_val*wbs_val;
 
  210         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
 
  213           value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
 
  216       Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
 
  219         residual(cell,basis,k) = value[k]+value2[k];
 
  225 template <
typename FadType, 
int N, 
typename ExecSpace>
 
  227                      int ntrial, 
bool check)
 
  229   typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
 
  230   typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
 
  231   typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
 
  233   t_4DView wgb(
"",ncells,num_basis,num_points,ndim,
N+1);
 
  234   t_3DView flux(
"",ncells,num_points,ndim,
N+1);
 
  235   t_3DView wbs(
"",ncells,num_basis,num_points,
N+1);
 
  236   t_2DView src(
"",ncells,num_points,
N+1);
 
  237   t_2DView residual(
"",ncells,num_basis,
N+1);
 
  238   init_fad(wgb, wbs, flux, src, residual);
 
  245   Kokkos::Impl::Timer timer;
 
  246   for (
int i=0; i<ntrial; ++i)
 
  249   double time = timer.seconds() / ntrial / ncells;
 
  258 template <
typename FadType, 
int N, 
typename ExecSpace>
 
  260                         int ntrial, 
bool check)
 
  262   typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
 
  263   typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
 
  264   typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
 
  266   t_4DView wgb(
"",ncells,num_basis,num_points,ndim,
N+1);
 
  267   t_3DView flux(
"",ncells,num_points,ndim,
N+1);
 
  268   t_3DView wbs(
"",ncells,num_basis,num_points,
N+1);
 
  269   t_2DView src(
"",ncells,num_points,
N+1);
 
  270   t_2DView residual(
"",ncells,num_basis,
N+1);
 
  271   init_fad(wgb, wbs, flux, src, residual);
 
  278   Kokkos::Impl::Timer timer;
 
  279   for (
int i=0; i<ntrial; ++i)
 
  282   double time = timer.seconds() / ntrial / ncells;
 
  291 template <
int N, 
typename ExecSpace>
 
  293                           int ntrial, 
bool check)
 
  295   typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
 
  296   typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
 
  297   typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
 
  299   t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
 
  300   t_3DView flux(
"",ncells,num_points,ndim);
 
  301   t_3DView wbs(
"",ncells,num_basis,num_points);
 
  302   t_2DView src(
"",ncells,num_points);
 
  303   t_2DView residual(
"",ncells,num_basis);
 
  307   run_analytic_flat<N>(flux, wgb, src, wbs, residual);
 
  311   Kokkos::Impl::Timer timer;
 
  312   for (
int i=0; i<ntrial; ++i)
 
  313     run_analytic_flat<N>(flux, wgb, src, wbs, residual);
 
  315   double time = timer.seconds() / ntrial / ncells;
 
  324 template <
int N, 
typename ExecSpace>
 
  326                            int ntrial, 
bool check)
 
  328   typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
 
  329   typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
 
  330   typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
 
  331   typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
 
  333   t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
 
  334   t_3DView flux(
"",ncells,num_points,ndim);
 
  335   t_3DView wbs(
"",ncells,num_basis,num_points);
 
  336   t_2DView src(
"",ncells,num_points);
 
  337   t_2DView residual(
"",ncells,num_basis);
 
  340   t_3DView_const flux_const = flux;
 
  343   run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
 
  347   Kokkos::Impl::Timer timer;
 
  348   for (
int i=0; i<ntrial; ++i)
 
  349     run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
 
  351   double time = timer.seconds() / ntrial / ncells;
 
  360 template <
int N, 
typename ExecSpace>
 
  362                           int ntrial, 
bool check)
 
  364   typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
 
  365   typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
 
  366   typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
 
  367   typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
 
  369   t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
 
  370   t_3DView flux(
"",ncells,num_points,ndim);
 
  371   t_3DView wbs(
"",ncells,num_basis,num_points);
 
  372   t_2DView src(
"",ncells,num_points);
 
  373   t_2DView residual(
"",ncells,num_basis);
 
  376   t_3DView_const flux_const = flux;
 
  379   run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
 
  383   Kokkos::Impl::Timer timer;
 
  384   for (
int i=0; i<ntrial; ++i)
 
  385     run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
 
  387   double time = timer.seconds() / ntrial / ncells;
 
  396 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \ 
  397   template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \ 
  398   template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); 
  400 #define INST_FUNC_N_DEV(N,DEV) \ 
  401   INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \ 
  402   INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \ 
  403   INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \ 
  404   template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \ 
  405   template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \ 
  406   template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); 
  408 #define INST_FUNC_DEV(DEV) \ 
  409   INST_FUNC_N_DEV( fad_dim, DEV ) 
  411 #ifdef KOKKOS_ENABLE_SERIAL 
  415 #ifdef KOKKOS_ENABLE_OPENMP 
  419 #ifdef KOKKOS_ENABLE_THREADS 
  423 #ifdef KOKKOS_ENABLE_CUDA 
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)