9 #ifndef _COMPADRE_APPLY_TARGET_EVALUATIONS_HPP_
10 #define _COMPADRE_APPLY_TARGET_EVALUATIONS_HPP_
21 template <
typename SolutionData>
22 KOKKOS_INLINE_FUNCTION
25 const int target_index = data._initial_index_for_batch + teamMember.league_rank();
27 #if defined(COMPADRE_USE_CUDA)
68 const auto n_evaluation_sites_per_target = data.additional_number_of_neighbors_list(target_index) + 1;
69 const auto nn = data.number_of_neighbors_list(target_index);
70 auto alphas = data._d_ss._alphas;
71 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
72 nn + data._d_ss._added_alpha_size), [&] (
const int i) {
73 for (
int e=0; e<n_evaluation_sites_per_target; ++e) {
74 for (
int j=0; j<(int)data.operations_size; ++j) {
75 for (
int k=0; k<data._d_ss._lro_output_tile_size[j]; ++k) {
76 for (
int m=0; m<data._d_ss._lro_input_tile_size[j]; ++m) {
77 const int offset_index_jmke = data._d_ss.getTargetOffsetIndex(j,m,k,e);
78 const int alphas_index = data._d_ss.getAlphaIndex(target_index, offset_index_jmke);
80 if (data._sampling_multiplier>1 && m<data._sampling_multiplier) {
81 const int m_neighbor_offset = i+m*nn;
82 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, data.this_num_cols),
83 [&] (
int& l,
double& t_alpha_ij) {
84 t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, m_neighbor_offset);
87 &&
"NaN in P_target_row matrix.");
89 &&
"NaN in Q coefficient matrix.");
92 }
else if (data._sampling_multiplier == 1) {
93 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, data.this_num_cols),
94 [&] (
int& l,
double& t_alpha_ij) {
95 t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l,i);
98 &&
"NaN in P_target_row matrix.");
100 &&
"NaN in Q coefficient matrix.");
106 alphas(alphas_index+i) = alpha_ij;
117 const int alphas_per_tile_per_target = data.number_of_neighbors_list(target_index) + data._d_ss._added_alpha_size;
118 const global_index_type base_offset_index_jmke = data._d_ss.getTargetOffsetIndex(0,0,0,0);
119 const global_index_type base_alphas_index = data._d_ss.getAlphaIndex(target_index, base_offset_index_jmke);
121 scratch_matrix_right_type this_alphas(data._d_ss._alphas.data() +
TO_GLOBAL(base_alphas_index), data._d_ss._total_alpha_values*data._d_ss._max_evaluation_sites_per_target, alphas_per_tile_per_target);
123 auto n_evaluation_sites_per_target = data.additional_number_of_neighbors_list(target_index) + 1;
124 const auto nn = data.number_of_neighbors_list(target_index);
125 for (
int e=0; e<n_evaluation_sites_per_target; ++e) {
127 for (
size_t j=0; j<data.operations_size; ++j) {
128 for (
int k=0; k<data._d_ss._lro_output_tile_size[j]; ++k) {
129 for (
int m=0; m<data._d_ss._lro_input_tile_size[j]; ++m) {
130 const int offset_index_jmke = data._d_ss.getTargetOffsetIndex(j,m,k,e);
131 for (
int i=0; i<nn + data._d_ss._added_alpha_size; ++i) {
133 const int Q_col = i+m*nn;
136 for (
int l=0; l<data.this_num_cols; ++l) {
137 if (data._sampling_multiplier>1 && m<data._sampling_multiplier) {
139 alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, Q_col);
142 &&
"NaN in P_target_row matrix.");
144 &&
"NaN in Q coefficient matrix.");
146 }
else if (data._sampling_multiplier == 1) {
148 alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i);
151 &&
"NaN in P_target_row matrix.");
153 &&
"NaN in Q coefficient matrix.");
159 this_alphas(offset_index_jmke,i) = alpha_ij;
167 teamMember.team_barrier();
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const SolutionData &data, const member_type &teamMember, scratch_matrix_right_type Q, scratch_matrix_right_type P_target_row)
For applying the evaluations from a target functional to the polynomial coefficients.
team_policy::member_type member_type
#define compadre_kernel_assert_extreme_debug(condition)
#define TO_GLOBAL(variable)
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type