17 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
18 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
29 #include <Kokkos_Pair.hpp>
30 #include <Kokkos_UnorderedMap.hpp>
31 #include <KokkosSparse_StaticCrsGraph.hpp>
33 #include <Kokkos_Timer.hpp>
54 Dim3(
const size_t x_,
const size_t y_ = 1,
const size_t z_ = 1) :
55 x(x_),
y(y_),
z(z_) {}
63 const size_t threads_per_block_x_ = 0,
64 const size_t threads_per_block_y_ = 0,
65 const size_t threads_per_block_z_ = 1) :
66 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
72 template<
typename ValueType ,
class Space >
74 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
75 typedef KokkosSparse::StaticCrsGraph< unsigned , Space , void , unsigned >
StaticCrsGraphType ;
77 typedef KokkosSparse::StaticCrsGraph< unsigned , Space , void , void , unsigned >
StaticCrsGraphType ;
88 ,
values(
"crs_matrix_values" , arg_graph.entries.extent(0) )
94 template <
typename ViewType,
typename Enabled =
void>
104 KOKKOS_INLINE_FUNCTION
106 const unsigned local_rank)
110 #if defined( KOKKOS_ENABLE_CUDA )
112 template <
typename ViewType>
113 struct LocalViewTraits<
115 typename std::enable_if< std::is_same<typename ViewType::execution_space,
116 Kokkos::Cuda>::value &&
117 Kokkos::is_view_mp_vector<ViewType>::value
124 KOKKOS_INLINE_FUNCTION
126 const unsigned local_rank)
128 return Kokkos::partition<1>(v, local_rank);
135 template <
typename ScalarType>
145 template <
typename StorageType>
150 static const unsigned VectorSize = StorageType::static_size;
151 #if defined( KOKKOS_ENABLE_CUDA )
152 enum { is_cuda = std::is_same< execution_space, Kokkos::Cuda >::value };
154 enum { is_cuda =
false };
167 template<
class ElemNodeIdView ,
class CrsGraphType ,
unsigned ElemNode >
174 typedef Kokkos::UnorderedMap< key_type, void , execution_space >
SetType ;
175 typedef typename CrsGraphType::row_map_type::non_const_type
RowMapType ;
179 typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
214 const unsigned arg_node_count,
230 Kokkos::Timer wall_clock ;
236 size_t set_capacity = (((28ull *
node_count) / 2ull)*4ull)/3ull;
268 unsigned graph_entry_count = 0 ;
274 graph.entries =
typename CrsGraphType::entries_type(
"graph_entries" , graph_entry_count );
320 KOKKOS_INLINE_FUNCTION
324 for (
unsigned row_local_node = 0 ; row_local_node <
elem_node_id.extent(1) ; ++row_local_node ) {
326 const unsigned row_node =
elem_node_id( ielem , row_local_node );
328 for (
unsigned col_local_node = row_local_node ; col_local_node <
elem_node_id.extent(1) ; ++col_local_node ) {
330 const unsigned col_node =
elem_node_id( ielem , col_local_node );
336 const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
338 const typename SetType::insert_result result =
node_node_set.insert( key );
340 if ( result.success() ) {
349 KOKKOS_INLINE_FUNCTION
354 const unsigned row_node = key.first ;
355 const unsigned col_node = key.second ;
359 graph.entries( offset ) = col_node ;
362 if ( col_node <
row_count.extent(0) && col_node != row_node ) {
364 graph.entries( offset ) = row_node ;
369 KOKKOS_INLINE_FUNCTION
372 typedef typename CrsGraphType::size_type size_type;
373 const size_type row_beg =
graph.row_map( irow );
374 const size_type row_end =
graph.row_map( irow + 1 );
375 for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
376 const typename CrsGraphType::data_type col =
graph.entries(i);
378 for ( ; row_beg < j && col <
graph.entries(j-1) ; --
j ) {
381 graph.entries(j) = col ;
385 KOKKOS_INLINE_FUNCTION
388 typedef typename CrsGraphType::data_type entry_type;
389 for (
unsigned row_local_node = 0 ; row_local_node <
elem_node_id.extent(1) ; ++row_local_node ) {
391 const unsigned row_node =
elem_node_id( ielem , row_local_node );
393 for (
unsigned col_local_node = 0 ; col_local_node <
elem_node_id.extent(1) ; ++col_local_node ) {
395 const unsigned col_node =
elem_node_id( ielem , col_local_node );
397 entry_type entry = 0 ;
399 if ( row_node + 1 <
graph.row_map.extent(0) ) {
401 const entry_type entry_end =
static_cast<entry_type
> (
graph.row_map( row_node + 1 ));
403 entry =
graph.row_map( row_node );
405 for ( ; entry < entry_end &&
graph.entries(entry) !=
static_cast<entry_type
> (col_node) ; ++entry );
407 if ( entry == entry_end ) entry = ~0u ;
410 elem_graph( ielem , row_local_node , col_local_node ) = entry ;
415 KOKKOS_INLINE_FUNCTION
437 KOKKOS_INLINE_FUNCTION
453 KOKKOS_INLINE_FUNCTION
456 KOKKOS_INLINE_FUNCTION
457 void join(
unsigned &
update ,
const unsigned & input )
const { update += input ; }
478 KOKKOS_INLINE_FUNCTION
479 double operator()(
double pt[],
unsigned ensemble_rank)
const
490 template <
typename Scalar,
typename MeshScalar,
typename Device >
495 template <
typename T1,
typename T2 = MeshScalar,
typename T3 = Device>
502 typedef typename RandomVariableView::size_type
size_type;
519 const MeshScalar mean ,
520 const MeshScalar variance ,
521 const MeshScalar correlation_length ,
530 solverParams.
set(
"Number of KL Terms",
int(num_rv));
531 solverParams.
set(
"Mean", mean);
532 solverParams.
set(
"Standard Deviation",
std::sqrt(variance));
535 correlation_lengths(ndim, correlation_length);
536 solverParams.
set(
"Domain Upper Bounds", domain_upper);
537 solverParams.
set(
"Domain Lower Bounds", domain_lower);
538 solverParams.
set(
"Correlation Lengths", correlation_lengths);
551 KOKKOS_INLINE_FUNCTION
554 KOKKOS_INLINE_FUNCTION
557 KOKKOS_INLINE_FUNCTION
571 class CoordinateMap ,
typename ScalarType >
586 typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space >
vector_type ;
600 typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space >
elem_matrices_type ;
651 KOKKOS_INLINE_FUNCTION
659 double dpsidz[] )
const
661 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
662 j21 = 3 , j22 = 4 , j23 = 5 ,
663 j31 = 6 , j32 = 7 , j33 = 8 };
667 double J[
TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
670 const double x1 = x[i] ;
671 const double x2 = y[i] ;
672 const double x3 = z[i] ;
674 const double g1 = grad[0][i] ;
675 const double g2 = grad[1][i] ;
676 const double g3 = grad[2][i] ;
694 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
695 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
696 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
698 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
699 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
700 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
702 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
703 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
704 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
706 const double detJ = J[j11] * invJ[j11] +
710 const double detJinv = 1.0 / detJ ;
712 for (
unsigned i = 0 ; i <
TensorDim ; ++i ) { invJ[i] *= detJinv ; }
717 const double g0 = grad[0][i];
718 const double g1 = grad[1][i];
719 const double g2 = grad[2][i];
721 dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
722 dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
723 dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
738 template<
class FiniteElementMeshType ,
739 class SparseMatrixType ,
741 class CoeffFunctionType = ElementComputationConstantCoefficient
746 class CoordinateMap ,
typename ScalarType ,
class CoeffFunctionType >
749 CrsMatrix< ScalarType , ExecutionSpace > ,
770 coeff_function( rhs.coeff_function ) ,
771 dev_config( rhs.dev_config ) {}
775 const CoeffFunctionType & arg_coeff_function ,
781 base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
783 coeff_function( arg_coeff_function ) ,
784 dev_config( arg_dev_config ) {}
791 parallel_for( nelem , *
this );
794 KOKKOS_INLINE_FUNCTION
797 unsigned node_index[],
798 double x[],
double y[],
double z[],
820 KOKKOS_INLINE_FUNCTION
822 const unsigned node_index[],
827 const unsigned row = node_index[i] ;
828 if ( row < this->
residual.extent(0) ) {
832 const unsigned entry = this->
elem_graph( ielem , i ,
j );
833 if ( entry != ~0u ) {
841 KOKKOS_INLINE_FUNCTION
851 double coeff_src = 1.234;
852 double advection[] = { 1.1, 1.2, 1.3 };
856 double pt[] = {0.0, 0.0, 0.0};
861 if ( ! coeff_function.is_constant ) {
868 coeff_k = coeff_function(pt, 0);
875 dpsidx , dpsidy , dpsidz );
876 const double detJ_weight = detJ * integ_weight;
877 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
884 value_at_pt += dof_values[m] * bases_vals[m] ;
885 gradx_at_pt += dof_values[m] * dpsidx[m] ;
886 grady_at_pt += dof_values[m] * dpsidy[m] ;
887 gradz_at_pt += dof_values[m] * dpsidz[m] ;
891 coeff_src * value_at_pt * value_at_pt ;
893 2.0 * coeff_src * value_at_pt ;
900 advection_x*gradx_at_pt +
901 advection_y*grady_at_pt +
902 advection_z*gradz_at_pt ;
906 const double bases_val_m = bases_vals[m] * detJ_weight ;
907 const double dpsidx_m = dpsidx[m] ;
908 const double dpsidy_m = dpsidy[m] ;
909 const double dpsidz_m = dpsidz[m] ;
912 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
913 dpsidy_m * grady_at_pt +
914 dpsidz_m * gradz_at_pt ) +
915 bases_val_m * ( advection_term + source_term ) ;
918 const double dpsidx_n = dpsidx[
n] ;
919 const double dpsidy_n = dpsidy[
n] ;
920 const double dpsidz_n = dpsidz[
n] ;
922 detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
923 dpsidy_m * dpsidy_n +
924 dpsidz_m * dpsidz_n ) +
925 bases_val_m * ( advection_x * dpsidx_n +
926 advection_y * dpsidy_n +
927 advection_z * dpsidz_n +
928 source_deriv * bases_vals[
n] ) ;
934 KOKKOS_INLINE_FUNCTION
947 gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
950 computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
953 scatterResidual( ielem, node_index, elem_res, elem_mat );
958 class CoordinateMap ,
typename ScalarType ,
class CoeffFunctionType >
961 CrsMatrix< ScalarType , ExecutionSpace > ,
985 coeff_function( rhs.coeff_function ) ,
986 dev_config( rhs.dev_config ) {}
990 const CoeffFunctionType & arg_coeff_function ,
996 base_type(arg_mesh, arg_solution, arg_elem_graph,
997 arg_jacobian, arg_residual),
998 coeff_function( arg_coeff_function ) ,
999 dev_config( arg_dev_config ) {}
1006 parallel_for( nelem , *
this );
1009 KOKKOS_INLINE_FUNCTION
1012 unsigned node_index[],
1013 double x[],
double y[],
double z[],
1019 node_index[i] = ni ;
1025 val[i].val() = this->
solution( ni );
1030 KOKKOS_INLINE_FUNCTION
1032 const unsigned node_index[],
1036 const unsigned row = node_index[i] ;
1037 if ( row < this->
residual.extent(0) ) {
1041 const unsigned entry = this->
elem_graph( ielem , i ,
j );
1042 if ( entry != ~0u ) {
1044 res[i].fastAccessDx(
j) );
1051 template <
typename local_scalar_type>
1052 KOKKOS_INLINE_FUNCTION
1057 local_scalar_type elem_res[] )
const
1060 double coeff_src = 1.234;
1061 double advection[] = { 1.1, 1.2, 1.3 };
1065 double pt[] = {0.0, 0.0, 0.0};
1070 if ( ! coeff_function.is_constant ) {
1077 coeff_k = coeff_function(pt, 0);
1084 dpsidx , dpsidy , dpsidz );
1085 const double detJ_weight = detJ * integ_weight;
1086 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1088 local_scalar_type value_at_pt = 0 ;
1089 local_scalar_type gradx_at_pt = 0 ;
1090 local_scalar_type grady_at_pt = 0 ;
1091 local_scalar_type gradz_at_pt = 0 ;
1093 value_at_pt += dof_values[m] * bases_vals[m] ;
1094 gradx_at_pt += dof_values[m] * dpsidx[m] ;
1095 grady_at_pt += dof_values[m] * dpsidy[m] ;
1096 gradz_at_pt += dof_values[m] * dpsidz[m] ;
1099 const local_scalar_type source_term =
1100 coeff_src * value_at_pt * value_at_pt ;
1102 const local_scalar_type advection_term =
1103 advection[0]*gradx_at_pt +
1104 advection[1]*grady_at_pt +
1105 advection[2]*gradz_at_pt;
1108 const double bases_val_m = bases_vals[m] * detJ_weight ;
1109 const double dpsidx_m = dpsidx[m] ;
1110 const double dpsidy_m = dpsidy[m] ;
1111 const double dpsidz_m = dpsidz[m] ;
1114 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1115 dpsidy_m * grady_at_pt +
1116 dpsidz_m * gradz_at_pt ) +
1117 bases_val_m * ( advection_term + source_term ) ;
1122 KOKKOS_INLINE_FUNCTION
1134 gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1137 computeElementResidual( val, x, y, z, elem_res );
1140 scatterResidual( ielem, node_index, elem_res );
1145 class CoordinateMap ,
typename ScalarType ,
class CoeffFunctionType >
1148 CrsMatrix< ScalarType , ExecutionSpace > ,
1150 public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1151 CrsMatrix< ScalarType , ExecutionSpace > ,
1152 FadElement, CoeffFunctionType > {
1173 const CoeffFunctionType & arg_coeff_function ,
1179 base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1180 arg_jacobian, arg_residual, arg_dev_config) {}
1187 parallel_for( nelem , *
this );
1190 KOKKOS_INLINE_FUNCTION
1193 unsigned node_index[],
1194 double x[],
double y[],
double z[],
1200 node_index[i] = ni ;
1210 template <
typename local_scalar_type>
1211 KOKKOS_INLINE_FUNCTION
1216 local_scalar_type elem_res[] )
const
1219 double coeff_src = 1.234;
1220 double advection[] = { 1.1, 1.2, 1.3 };
1224 double pt[] = {0.0, 0.0, 0.0};
1229 if ( ! this->coeff_function.is_constant ) {
1236 coeff_k = this->coeff_function(pt, 0);
1243 dpsidx , dpsidy , dpsidz );
1244 const double detJ_weight = detJ * integ_weight;
1245 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1252 value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1253 value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1255 gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1256 gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1258 grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1259 grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1261 gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1262 gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1265 const local_scalar_type source_term =
1266 coeff_src * value_at_pt * value_at_pt ;
1268 const local_scalar_type advection_term =
1269 advection[0]*gradx_at_pt +
1270 advection[1]*grady_at_pt +
1271 advection[2]*gradz_at_pt;
1274 const double bases_val_m = bases_vals[m] * detJ_weight ;
1275 const double dpsidx_m = dpsidx[m] ;
1276 const double dpsidy_m = dpsidy[m] ;
1277 const double dpsidz_m = dpsidz[m] ;
1280 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1281 dpsidy_m * grady_at_pt +
1282 dpsidz_m * gradz_at_pt ) +
1283 bases_val_m * ( advection_term + source_term ) ;
1288 KOKKOS_INLINE_FUNCTION
1300 gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1303 computeElementResidual( val, x, y, z, elem_res );
1306 this->scatterResidual( ielem, node_index, elem_res );
1311 class CoordinateMap ,
typename ScalarType ,
class CoeffFunctionType >
1314 CrsMatrix< ScalarType , ExecutionSpace > ,
1316 public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1317 CrsMatrix< ScalarType , ExecutionSpace > ,
1318 Analytic , CoeffFunctionType > {
1339 const CoeffFunctionType & arg_coeff_function ,
1345 base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1346 arg_jacobian, arg_residual, arg_dev_config) {}
1353 parallel_for( nelem , *
this );
1356 KOKKOS_INLINE_FUNCTION
1366 double coeff_src = 1.234;
1367 double advection[] = { 1.1, 1.2, 1.3 };
1371 double pt[] = {0.0, 0.0, 0.0};
1381 if ( ! this->coeff_function.is_constant ) {
1388 coeff_k = this->coeff_function(pt, 0);
1395 dpsidx , dpsidy , dpsidz );
1396 const double detJ_weight = detJ * integ_weight;
1397 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1399 value_at_pt.val() = 0.0 ;
1400 gradx_at_pt.val() = 0.0 ;
1401 grady_at_pt.val() = 0.0 ;
1402 gradz_at_pt.val() = 0.0 ;
1404 value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1405 gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1406 grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1407 gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1411 coeff_src * value_at_pt * value_at_pt ;
1414 advection[0]*gradx_at_pt +
1415 advection[1]*grady_at_pt +
1416 advection[2]*gradz_at_pt;
1419 const double bases_val_m = bases_vals[m] * detJ_weight ;
1421 detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1422 dpsidy[m] * grady_at_pt +
1423 dpsidz[m] * gradz_at_pt ) +
1424 bases_val_m * ( advection_term + source_term ) ;
1426 elem_res[m] += res.val();
1430 mat[
n] += res.fastAccessDx(0) * bases_vals[
n] +
1431 res.fastAccessDx(1) * dpsidx[
n] +
1432 res.fastAccessDx(2) * dpsidy[
n] +
1433 res.fastAccessDx(3) * dpsidz[
n];
1439 KOKKOS_INLINE_FUNCTION
1452 this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1455 computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1458 this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1463 template<
class FiniteElementMeshType ,
class SparseMatrixType
1464 ,
class CoeffFunctionType = ElementComputationConstantCoefficient
1466 class ElementComputation ;
1470 typename ScalarType ,
class CoeffFunctionType >
1471 class ElementComputation
1474 , CoeffFunctionType >
1489 typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space >
vector_type ;
1493 typedef LocalViewTraits< vector_type > local_vector_view_traits;
1494 typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1495 typedef typename local_vector_view_traits::local_view_type local_vector_type;
1496 typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1497 typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1498 static const bool use_team = local_vector_view_traits::use_team;
1510 typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space >
elem_matrices_type ;
1511 typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space >
elem_vectors_type ;
1513 typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1514 typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1515 typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1516 typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1518 typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType
elem_graph_type ;
1535 const CoeffFunctionType coeff_function ;
1538 ElementComputation(
const ElementComputation & rhs )
1548 , coeff_function( rhs.coeff_function )
1549 , dev_config( rhs.dev_config )
1554 ElementComputation(
const mesh_type & arg_mesh ,
1555 const CoeffFunctionType & arg_coeff_function ,
1570 , coeff_function( arg_coeff_function )
1571 , dev_config( arg_dev_config )
1580 const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1581 const size_t league_size =
1582 (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1583 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1584 parallel_for( config , *
this );
1587 parallel_for( nelem , *
this );
1593 static const unsigned FLOPS_transform_gradients =
1598 KOKKOS_INLINE_FUNCTION
1606 double dpsidz[] )
const
1608 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1609 j21 = 3 , j22 = 4 , j23 = 5 ,
1610 j31 = 6 , j32 = 7 , j33 = 8 };
1614 double J[
TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1617 const double x1 = x[i] ;
1618 const double x2 = y[i] ;
1619 const double x3 = z[i] ;
1621 const double g1 = grad[0][i] ;
1622 const double g2 = grad[1][i] ;
1623 const double g3 = grad[2][i] ;
1641 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1642 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1643 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1645 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1646 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1647 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1649 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1650 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1651 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1653 const double detJ = J[j11] * invJ[j11] +
1654 J[j21] * invJ[j12] +
1655 J[j31] * invJ[j13] ;
1657 const double detJinv = 1.0 / detJ ;
1659 for (
unsigned i = 0 ; i <
TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1664 const double g0 = grad[0][i];
1665 const double g1 = grad[1][i];
1666 const double g2 = grad[2][i];
1668 dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1669 dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1670 dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1676 KOKKOS_INLINE_FUNCTION
1677 void contributeResidualJacobian(
1678 const local_scalar_type dof_values[] ,
1679 const double dpsidx[] ,
1680 const double dpsidy[] ,
1681 const double dpsidz[] ,
1683 const local_scalar_type coeff_k ,
1684 const double integ_weight ,
1685 const double bases_vals[] ,
1686 local_scalar_type elem_res[] ,
1687 local_scalar_type elem_mat[][ FunctionCount ] )
const
1689 local_scalar_type value_at_pt = 0 ;
1690 local_scalar_type gradx_at_pt = 0 ;
1691 local_scalar_type grady_at_pt = 0 ;
1692 local_scalar_type gradz_at_pt = 0 ;
1695 value_at_pt += dof_values[m] * bases_vals[m] ;
1696 gradx_at_pt += dof_values[m] * dpsidx[m] ;
1697 grady_at_pt += dof_values[m] * dpsidy[m] ;
1698 gradz_at_pt += dof_values[m] * dpsidz[m] ;
1701 const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1702 const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1703 const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1709 local_scalar_type *
const mat = elem_mat[m] ;
1710 const double bases_val_m = bases_vals[m];
1711 const double dpsidx_m = dpsidx[m] ;
1712 const double dpsidy_m = dpsidy[m] ;
1713 const double dpsidz_m = dpsidz[m] ;
1715 elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1716 dpsidy_m * grady_at_pt +
1717 dpsidz_m * gradz_at_pt ) +
1718 res_val * bases_val_m ;
1722 mat[
n] += k_detJ_weight * ( dpsidx_m * dpsidx[
n] +
1723 dpsidy_m * dpsidy[
n] +
1724 dpsidz_m * dpsidz[
n] ) +
1725 mat_val * bases_val_m * bases_vals[n];
1730 KOKKOS_INLINE_FUNCTION
1731 void operator()(
const typename TeamPolicy< execution_space >::member_type & dev )
const
1734 const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1735 const unsigned num_element_threads = dev_config.block_dim.y ;
1736 const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1737 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1739 const unsigned ielem =
1740 dev.league_rank() * num_element_threads + element_rank;
1745 (*this)( ielem, ensemble_rank );
1749 KOKKOS_INLINE_FUNCTION
1750 void operator()(
const unsigned ielem ,
1751 const unsigned ensemble_rank = 0 )
const
1753 local_vector_type local_solution =
1754 local_vector_view_traits::create_local_view(
solution,
1756 local_vector_type local_residual =
1757 local_vector_view_traits::create_local_view(
residual,
1759 local_matrix_type local_jacobian_values =
1774 node_index[i] = ni ;
1780 val[i] = local_solution( ni );
1790 elem_mat[i][
j] = 0 ;
1800 local_scalar_type coeff_k = 0 ;
1803 double pt[] = {0.0, 0.0, 0.0};
1807 if ( ! coeff_function.is_constant ) {
1816 coeff_k = coeff_function(pt, ensemble_rank);
1821 dpsidx , dpsidy , dpsidz );
1823 contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1827 elem_vec , elem_mat );
1831 const unsigned row = node_index[i] ;
1833 atomic_add( & local_residual( row ) , elem_vec[i] );
1836 const unsigned entry =
elem_graph( ielem , i ,
j );
1837 if ( entry != ~0u ) {
1838 atomic_add( & local_jacobian_values( entry ) , elem_mat[i][
j] );
1849 template<
class FixtureType ,
class SparseMatrixType >
1853 typename ScalarType >
1878 static const bool use_team = local_vector_view_traits::use_team;
1903 const unsigned arg_bc_plane ,
1911 , bc_lower_value( arg_bc_lower_value )
1912 , bc_upper_value( arg_bc_upper_value )
1915 , bc_plane( arg_bc_plane )
1916 , node_count( arg_mesh.node_count_owned() )
1918 , dev_config( arg_dev_config )
1920 parallel_for( node_count , *
this );
1927 const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1928 const size_t league_size =
1929 (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1930 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1931 parallel_for( config , *
this );
1934 parallel_for( node_count , *
this );
1939 KOKKOS_INLINE_FUNCTION
1940 void operator()(
const typename TeamPolicy< execution_space >::member_type & dev )
const
1943 const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1944 const unsigned num_node_threads = dev_config.block_dim.y ;
1945 const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1946 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1948 const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1950 if (inode >= node_count)
1953 (*this)( inode, ensemble_rank );
1957 KOKKOS_INLINE_FUNCTION
1959 const unsigned ensemble_rank = 0)
const
1962 local_vector_view_traits::create_local_view(
residual,
1977 const bool bc_lower = c <= bc_lower_limit ;
1978 const bool bc_upper = bc_upper_limit <= c ;
1981 solution(inode) = bc_lower ? bc_lower_value : (
1982 bc_upper ? bc_upper_value : 0 );
1985 if ( bc_lower || bc_upper ) {
1987 local_residual(inode) = 0 ;
1992 for(
unsigned i = iBeg ; i < iEnd ; ++i ) {
1993 local_jacobian_values(i) = int(inode) == int(
jacobian.
graph.entries(i)) ? 1 : 0 ;
2001 for(
unsigned i = iBeg ; i < iEnd ; ++i ) {
2005 if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2006 local_jacobian_values(i) = 0 ;
2021 #if defined( __CUDACC__ )
double weights[integration_count]
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
LocalViewTraits< vector_type > local_vector_view_traits
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
const scalar_coord_type bc_upper_limit
static const unsigned SpatialDim
Kokkos::View< scalar_type *, execution_space > vector_type
const Kokkos::Example::FENL::DeviceConfig dev_config
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
LocalViewTraits< matrix_values_type > local_matrix_view_traits
Sacado::Fad::SLFad< scalar_type, FunctionCount > fad_scalar_type
const unsigned node_count
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
ElementComputation(const ElementComputation &rhs)
LocalViewTraits< RandomVariableView > local_rv_view_traits
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typename rv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
const elem_vectors_type elem_residuals
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
const vector_type solution
static const bool use_team
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
static const unsigned element_node_count
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
const elem_matrices_type elem_jacobians
view_type::value_type local_value_type
static const unsigned function_count
KOKKOS_INLINE_FUNCTION void join(unsigned &update, const unsigned &input) const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
base_type::execution_space execution_space
ElementComputationBase< ExecutionSpace, Order, CoordinateMap, ScalarType > base_type
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::DefaultExecutionSpace execution_space
double gradients[integration_count][spatial_dimension][function_count]
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
static const unsigned ElemNodeCount
Sacado::Fad::SLFad< scalar_type, 4 > fad_scalar_type
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
const Kokkos::Example::FENL::DeviceConfig dev_config
const CoeffFunctionType coeff_function
ExecutionSpace execution_space
ElementComputation(const ElementComputation &rhs)
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], fad_scalar_type res[]) const
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
const vector_type residual
const bc_scalar_type bc_lower_value
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
const elem_node_type elem_node_ids
const vector_type residual
const unsigned node_count
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
const bc_scalar_type bc_upper_value
base_type::scalar_type scalar_type
base_type::execution_space execution_space
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
KOKKOS_INLINE_FUNCTION void operator()(const unsigned ielem) const
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
const vector_type solution
const MeshScalar m_variance
local_rv_view_traits::local_view_type local_rv_view_type
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
mesh_type::node_coord_type node_coord_type
ElemNodeIdView::execution_space execution_space
node_coord_type::value_type scalar_coord_type
static const unsigned TensorDim
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
RandomVariableView::size_type size_type
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
ExecutionSpace execution_space
double sort_graph_entries
size_t num_threads_per_block
double fill_graph_entries
KOKKOS_INLINE_FUNCTION void operator()(const unsigned ielem) const
ElementComputationBase(const ElementComputationBase &rhs)
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
KokkosSparse::StaticCrsGraph< unsigned, Space, void, void, unsigned > StaticCrsGraphType
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
CrsGraphType::row_map_type::non_const_type RowMapType
base_type::scalar_type scalar_type
const element_data_type elem_data
const MeshScalar m_corr_len
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
KOKKOS_INLINE_FUNCTION void operator()(const typename TeamPolicy< execution_space >::member_type &dev) const
ElementComputation(const ElementComputation &rhs)
base_type::execution_space execution_space
CrsMatrix(const StaticCrsGraphType &arg_graph)
const sparse_matrix_type jacobian
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
double values[integration_count][function_count]
mesh_type::elem_node_type elem_node_type
Kokkos::Example::FENL::CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned inode, const unsigned ensemble_rank=0) const
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
base_type::execution_space execution_space
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
base_type::scalar_type scalar_type
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
const CoeffFunctionType coeff_function
const scalar_coord_type bc_lower_limit
const ElemNodeIdView elem_node_id
static const unsigned integration_count
base_type::scalar_type scalar_type
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
mesh_type::node_coord_type node_coord_type
const elem_graph_type elem_graph
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
const Kokkos::Example::FENL::DeviceConfig dev_config
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
const view_type & local_view_type
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
const sparse_matrix_type jacobian
const node_coord_type node_coords
static const unsigned FunctionCount
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
Kokkos::View< unsigned, execution_space > UnsignedValue
const node_coord_type node_coords
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ExponentialKLCoefficient< T1, T2, T3 > type
KOKKOS_INLINE_FUNCTION void operator()(const unsigned ielem) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
local_rv_view_traits::local_value_type local_scalar_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
ElementComputationBase< ExecutionSpace, Order, CoordinateMap, ScalarType > base_type
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
View< ValueType *, Space > values_type
StorageType::execution_space execution_space
local_matrix_view_traits::local_view_type local_matrix_type
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
double fill_element_graph
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
const unsigned VectorSize
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
ElementComputationConstantCoefficient(const double val)
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
Sacado::Fad::SLFad< scalar_type, FunctionCount > fad_scalar_type
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
sparse_matrix_type::values_type matrix_values_type
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned ielem) const
local_vector_view_traits::local_view_type local_vector_type
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
ElementComputation(const ElementComputation &rhs)
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
Kokkos::UnorderedMap< key_type, void, execution_space > SetType
pair< unsigned, unsigned > key_type
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
static const unsigned IntegrationCount
static const unsigned spatial_dimension