Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadMPAssembly/fenl_functors.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos: Manycore Performance-Portable Multidimensional Arrays
6 // Copyright (2012) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
45 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
46 
47 #include <stdio.h>
48 
49 #include <iostream>
50 #include <fstream>
51 #include <iomanip>
52 #include <cstdlib>
53 #include <cmath>
54 #include <limits>
55 
56 #include <Kokkos_Pair.hpp>
57 #include <Kokkos_UnorderedMap.hpp>
58 
59 #include <impl/Kokkos_Timer.hpp>
60 
61 #include <BoxElemFixture.hpp>
62 #include <HexElement.hpp>
63 
65 #include "Sacado.hpp"
66 //#include "Fad/Sacado_Fad_SFad_MP_Vector.hpp"
68 //#include "Fad/Sacado_Fad_DFad_MP_Vector.hpp"
69 
70 //----------------------------------------------------------------------------
71 //----------------------------------------------------------------------------
72 
73 namespace Kokkos {
74 namespace Example {
75 namespace FENL {
76 
77 struct DeviceConfig {
78  struct Dim3 {
79  size_t x, y, z;
80  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
81  x(x_), y(y_), z(z_) {}
82  };
83 
85  size_t num_blocks;
87 
88  DeviceConfig(const size_t num_blocks_ = 0,
89  const size_t threads_per_block_x_ = 0,
90  const size_t threads_per_block_y_ = 0,
91  const size_t threads_per_block_z_ = 1) :
92  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
93  num_blocks(num_blocks_),
95  {}
96 };
97 
98 template< typename ValueType , class Space >
99 struct CrsMatrix {
100 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
101  typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
102 #else
103  typedef Kokkos::StaticCrsGraph< unsigned , Space , void , void , unsigned > StaticCrsGraphType ;
104 #endif
105  typedef View< ValueType * , Space > values_type ;
106 
109 
110  CrsMatrix() : graph(), values() {}
111 
112  CrsMatrix( const StaticCrsGraphType & arg_graph )
113  : graph( arg_graph )
114  , values( "crs_matrix_values" , arg_graph.entries.extent(0) )
115  {}
116 };
117 
118 // Traits class for creating strided local views for embedded ensemble-based,
119 // specialized for ensemble UQ scalar type
120 template <typename ViewType, typename Enabled = void>
122  typedef ViewType view_type;
123  // typedef Kokkos::View<typename view_type::data_type,
124  // typename view_type::array_layout,
125  // typename view_type::execution_space,
126  // Kokkos::MemoryUnmanaged> local_view_type;
127  typedef const view_type& local_view_type;
129  static const bool use_team = false;
130  KOKKOS_INLINE_FUNCTION
132  const unsigned local_rank)
133  { return v; }
134 };
135 
136 #if defined( KOKKOS_ENABLE_CUDA )
137 
138 template <typename ViewType>
139 struct LocalViewTraits<
140  ViewType,
141  typename std::enable_if< std::is_same<typename ViewType::execution_space,
142  Kokkos::Cuda>::value &&
143  Kokkos::is_view_mp_vector<ViewType>::value
144  >::type > {
145  typedef ViewType view_type;
148  static const bool use_team = true;
149 
150  KOKKOS_INLINE_FUNCTION
152  const unsigned local_rank)
153  {
154  return Kokkos::partition<1>(v, local_rank);
155  }
156 };
157 
158 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
159 
160 // Compute DeviceConfig struct's based on scalar type
161 template <typename ScalarType>
163  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
164  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
165  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
166  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
167  }
168 };
169 
170 // Compute DeviceConfig struct's based on scalar type
171 template <typename StorageType>
172 struct CreateDeviceConfigs< Sacado::MP::Vector<StorageType> > {
174  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
175  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
176  static const unsigned VectorSize = StorageType::static_size;
177 #if defined( KOKKOS_ENABLE_CUDA )
178  enum { is_cuda = std::is_same< execution_space, Kokkos::Cuda >::value };
179 #else
180  enum { is_cuda = false };
181 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
182  if ( is_cuda ) {
183  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 64/VectorSize );
184  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 256/VectorSize );
185  }
186  else {
187  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
188  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
189  }
190  }
191 };
192 
193 template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
195 public:
196 
198  typedef pair<unsigned,unsigned> key_type ;
199 
200  typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
201  typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
202  typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
203 
204  // Static dimensions of 0 generate compiler warnings or errors.
205  typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
207 
208 private:
209 
215 
216  const unsigned node_count ;
217  const ElemNodeIdView elem_node_id ;
223 
224 public:
225 
226  CrsGraphType graph ;
228 
229  struct Times
230  {
231  double ratio;
237  };
238 
239  NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
240  const unsigned arg_node_count,
241  Times & results
242  )
243  : node_count(arg_node_count)
244  , elem_node_id( arg_elem_node_id )
245  , row_total( "row_total" )
246  , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
247  , row_map( "graph_row_map" , node_count + 1 )
248  , node_node_set()
249  , phase( FILL_NODE_SET )
250  , graph()
251  , elem_graph()
252  {
253  //--------------------------------
254  // Guess at capacity required for the map:
255 
256  Kokkos::Impl::Timer wall_clock ;
257 
258  wall_clock.reset();
259  phase = FILL_NODE_SET ;
260 
261  // upper bound on the capacity
262  size_t set_capacity = (((28ull * node_count) / 2ull)*4ull)/3ull;
263 
264 
265  // Increase capacity until the (node,node) map is successfully filled.
266  {
267  // Zero the row count to restart the fill
269 
270  node_node_set = SetType( set_capacity );
271 
272  // May be larger that requested:
273  set_capacity = node_node_set.capacity();
274 
275  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
276  }
277 
278  execution_space().fence();
279  results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
280  results.fill_node_set = wall_clock.seconds();
281  //--------------------------------
282 
283  wall_clock.reset();
285 
286  // Exclusive scan of row_count into row_map
287  // including the final total in the 'node_count + 1' position.
288  // Zero the 'row_count' values.
289  Kokkos::parallel_scan( node_count , *this );
290 
291  // Zero the row count for the fill:
293 
294  unsigned graph_entry_count = 0 ;
295 
296  Kokkos::deep_copy( graph_entry_count , row_total );
297 
298  // Assign graph's row_map and allocate graph's entries
299  graph.row_map = row_map ;
300  graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
301 
302  //--------------------------------
303  // Fill graph's entries from the (node,node) set.
304 
305  execution_space().fence();
306  results.scan_node_count = wall_clock.seconds();
307 
308  wall_clock.reset();
310  Kokkos::parallel_for( node_node_set.capacity() , *this );
311 
312  execution_space().fence();
313  results.fill_graph_entries = wall_clock.seconds();
314 
315  //--------------------------------
316  // Done with the temporary sets and arrays
317  wall_clock.reset();
319 
321  row_count = RowMapType();
322  row_map = RowMapType();
323  node_node_set.clear();
324 
325  //--------------------------------
326 
327  Kokkos::parallel_for( node_count , *this );
328 
329  execution_space().fence();
330  results.sort_graph_entries = wall_clock.seconds();
331 
332  //--------------------------------
333  // Element-to-graph mapping:
334  wall_clock.reset();
336  elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) );
337  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
338 
339  execution_space().fence();
340  results.fill_element_graph = wall_clock.seconds();
341  }
342 
343  //------------------------------------
344  // parallel_for: create map and count row length
345 
346  KOKKOS_INLINE_FUNCTION
347  void fill_set( const unsigned ielem ) const
348  {
349  // Loop over element's (row_local_node,col_local_node) pairs:
350  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
351 
352  const unsigned row_node = elem_node_id( ielem , row_local_node );
353 
354  for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
355 
356  const unsigned col_node = elem_node_id( ielem , col_local_node );
357 
358  // If either node is locally owned then insert the pair into the unordered map:
359 
360  if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) {
361 
362  const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
363 
364  const typename SetType::insert_result result = node_node_set.insert( key );
365 
366  if ( result.success() ) {
367  if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 ); }
368  if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 ); }
369  }
370  }
371  }
372  }
373  }
374 
375  KOKKOS_INLINE_FUNCTION
376  void fill_graph_entries( const unsigned iset ) const
377  {
378  if ( node_node_set.valid_at(iset) ) {
379  const key_type key = node_node_set.key_at(iset) ;
380  const unsigned row_node = key.first ;
381  const unsigned col_node = key.second ;
382 
383  if ( row_node < row_count.extent(0) ) {
384  const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 );
385  graph.entries( offset ) = col_node ;
386  }
387 
388  if ( col_node < row_count.extent(0) && col_node != row_node ) {
389  const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 );
390  graph.entries( offset ) = row_node ;
391  }
392  }
393  }
394 
395  KOKKOS_INLINE_FUNCTION
396  void sort_graph_entries( const unsigned irow ) const
397  {
398  typedef typename CrsGraphType::size_type size_type;
399  const size_type row_beg = graph.row_map( irow );
400  const size_type row_end = graph.row_map( irow + 1 );
401  for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
402  const typename CrsGraphType::data_type col = graph.entries(i);
403  size_type j = i ;
404  for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
405  graph.entries(j) = graph.entries(j-1);
406  }
407  graph.entries(j) = col ;
408  }
409  }
410 
411  KOKKOS_INLINE_FUNCTION
412  void fill_elem_graph_map( const unsigned ielem ) const
413  {
414  typedef typename CrsGraphType::data_type entry_type;
415  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
416 
417  const unsigned row_node = elem_node_id( ielem , row_local_node );
418 
419  for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
420 
421  const unsigned col_node = elem_node_id( ielem , col_local_node );
422 
423  entry_type entry = 0 ;
424 
425  if ( row_node + 1 < graph.row_map.extent(0) ) {
426 
427  const entry_type entry_end = static_cast<entry_type> (graph.row_map( row_node + 1 ));
428 
429  entry = graph.row_map( row_node );
430 
431  for ( ; entry < entry_end && graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
432 
433  if ( entry == entry_end ) entry = ~0u ;
434  }
435 
436  elem_graph( ielem , row_local_node , col_local_node ) = entry ;
437  }
438  }
439  }
440 
441  KOKKOS_INLINE_FUNCTION
442  void operator()( const unsigned iwork ) const
443  {
444  if ( phase == FILL_NODE_SET ) {
445  fill_set( iwork );
446  }
447  else if ( phase == FILL_GRAPH_ENTRIES ) {
448  fill_graph_entries( iwork );
449  }
450  else if ( phase == SORT_GRAPH_ENTRIES ) {
451  sort_graph_entries( iwork );
452  }
453  else if ( phase == FILL_ELEMENT_GRAPH ) {
454  fill_elem_graph_map( iwork );
455  }
456  }
457 
458  //------------------------------------
459  // parallel_scan: row offsets
460 
461  typedef unsigned value_type ;
462 
463  KOKKOS_INLINE_FUNCTION
464  void operator()( const unsigned irow , unsigned & update , const bool final ) const
465  {
466  // exclusive scan
467  if ( final ) { row_map( irow ) = update ; }
468 
469  update += row_count( irow );
470 
471  if ( final ) {
472  if ( irow + 1 == row_count.extent(0) ) {
473  row_map( irow + 1 ) = update ;
474  row_total() = update ;
475  }
476  }
477  }
478 
479  KOKKOS_INLINE_FUNCTION
480  void init( unsigned & update ) const { update = 0 ; }
481 
482  KOKKOS_INLINE_FUNCTION
483  void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; }
484 
485  //------------------------------------
486 };
487 
488 } /* namespace FENL */
489 } /* namespace Example */
490 } /* namespace Kokkos */
491 
492 //----------------------------------------------------------------------------
493 //----------------------------------------------------------------------------
494 
495 namespace Kokkos {
496 namespace Example {
497 namespace FENL {
498 
500  enum { is_constant = false };
501 
502  const double coeff_k ;
503 
504  KOKKOS_INLINE_FUNCTION
505  double operator()( double pt[], unsigned ensemble_rank) const
506  { return coeff_k * std::sin(pt[0]) * std::sin(pt[1]) * std::sin(pt[2]); }
507 
509  : coeff_k( val ) {}
510 
512  : coeff_k( rhs.coeff_k ) {}
513 };
514 
515 // Exponential KL from Stokhos
516 template < typename Scalar, typename MeshScalar, typename Device >
518 public:
519 
520  // Turn into a meta-function class usable with Sacado::mpl::apply
521  template <typename T1, typename T2 = MeshScalar, typename T3 = Device>
522  struct apply {
524  };
525 
526  enum { is_constant = false };
527  typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device> RandomVariableView;
528  typedef typename RandomVariableView::size_type size_type;
529 
534 
535  rf_type m_rf; // Exponential random field
536  const MeshScalar m_mean; // Mean of random field
537  const MeshScalar m_variance; // Variance of random field
538  const MeshScalar m_corr_len; // Correlation length of random field
539  const size_type m_num_rv; // Number of random variables
540  RandomVariableView m_rv; // KL random variables
541 
542 public:
543 
545  const MeshScalar mean ,
546  const MeshScalar variance ,
547  const MeshScalar correlation_length ,
548  const size_type num_rv ) :
549  m_mean( mean ),
550  m_variance( variance ),
551  m_corr_len( correlation_length ),
552  m_num_rv( num_rv ),
553  m_rv( "KL Random Variables", m_num_rv )
554  {
555  Teuchos::ParameterList solverParams;
556  solverParams.set("Number of KL Terms", int(num_rv));
557  solverParams.set("Mean", mean);
558  solverParams.set("Standard Deviation", std::sqrt(variance));
559  int ndim = 3;
560  Teuchos::Array<double> domain_upper(ndim, 1.0), domain_lower(ndim, 0.0),
561  correlation_lengths(ndim, correlation_length);
562  solverParams.set("Domain Upper Bounds", domain_upper);
563  solverParams.set("Domain Lower Bounds", domain_lower);
564  solverParams.set("Correlation Lengths", correlation_lengths);
565 
566  m_rf = rf_type(solverParams);
567  }
568 
570  m_rf( rhs.m_rf ) ,
571  m_mean( rhs.m_mean ) ,
572  m_variance( rhs.m_variance ) ,
573  m_corr_len( rhs.m_corr_len ) ,
574  m_num_rv( rhs.m_num_rv ) ,
575  m_rv( rhs.m_rv ) {}
576 
577  KOKKOS_INLINE_FUNCTION
578  void setRandomVariables( const RandomVariableView& rv) { m_rv = rv; }
579 
580  KOKKOS_INLINE_FUNCTION
582 
583  KOKKOS_INLINE_FUNCTION
584  local_scalar_type operator() ( const MeshScalar point[],
585  const size_type ensemble_rank ) const
586  {
587  local_rv_view_type local_rv =
589 
590  local_scalar_type val = m_rf.evaluate(point, local_rv);
591 
592  return val;
593  }
594 };
595 
596 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
597  class CoordinateMap , typename ScalarType >
599 {
600 public:
601 
604 
605  //------------------------------------
606 
607  typedef ExecutionSpace execution_space ;
608  typedef ScalarType scalar_type ;
609 
612  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
613 
614  //------------------------------------
615 
617  static const unsigned TensorDim = SpatialDim * SpatialDim ;
621 
622  //------------------------------------
623 
626  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
627  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
628 
630 
631  //------------------------------------
632 
633 
634  //------------------------------------
635  // Computational data:
636 
646 
648  : elem_data()
650  , node_coords( rhs.node_coords )
651  , elem_graph( rhs.elem_graph )
654  , solution( rhs.solution )
655  , residual( rhs.residual )
656  , jacobian( rhs.jacobian )
657  {}
658 
659  ElementComputationBase( const mesh_type & arg_mesh ,
660  const vector_type & arg_solution ,
661  const elem_graph_type & arg_elem_graph ,
662  const sparse_matrix_type & arg_jacobian ,
663  const vector_type & arg_residual )
664  : elem_data()
665  , elem_node_ids( arg_mesh.elem_node() )
666  , node_coords( arg_mesh.node_coord() )
667  , elem_graph( arg_elem_graph )
668  , elem_jacobians()
669  , elem_residuals()
670  , solution( arg_solution )
671  , residual( arg_residual )
672  , jacobian( arg_jacobian )
673  {}
674 
675  //------------------------------------
676 
677  KOKKOS_INLINE_FUNCTION
679  const double grad[][ FunctionCount ] , // Gradient of bases master element
680  const double x[] ,
681  const double y[] ,
682  const double z[] ,
683  double dpsidx[] ,
684  double dpsidy[] ,
685  double dpsidz[] ) const
686  {
687  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
688  j21 = 3 , j22 = 4 , j23 = 5 ,
689  j31 = 6 , j32 = 7 , j33 = 8 };
690 
691  // Jacobian accumulation:
692 
693  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
694 
695  for( unsigned i = 0; i < FunctionCount ; ++i ) {
696  const double x1 = x[i] ;
697  const double x2 = y[i] ;
698  const double x3 = z[i] ;
699 
700  const double g1 = grad[0][i] ;
701  const double g2 = grad[1][i] ;
702  const double g3 = grad[2][i] ;
703 
704  J[j11] += g1 * x1 ;
705  J[j12] += g1 * x2 ;
706  J[j13] += g1 * x3 ;
707 
708  J[j21] += g2 * x1 ;
709  J[j22] += g2 * x2 ;
710  J[j23] += g2 * x3 ;
711 
712  J[j31] += g3 * x1 ;
713  J[j32] += g3 * x2 ;
714  J[j33] += g3 * x3 ;
715  }
716 
717  // Inverse jacobian:
718 
719  double invJ[ TensorDim ] = {
720  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
721  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
722  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
723 
724  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
725  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
726  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
727 
728  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
729  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
730  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
731 
732  const double detJ = J[j11] * invJ[j11] +
733  J[j21] * invJ[j12] +
734  J[j31] * invJ[j13] ;
735 
736  const double detJinv = 1.0 / detJ ;
737 
738  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
739 
740  // Transform gradients:
741 
742  for( unsigned i = 0; i < FunctionCount ; ++i ) {
743  const double g0 = grad[0][i];
744  const double g1 = grad[1][i];
745  const double g2 = grad[2][i];
746 
747  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
748  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
749  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
750  }
751 
752  return detJ ;
753  }
754 
755 };
756 
762 };
763 
764 template< class FiniteElementMeshType ,
765  class SparseMatrixType ,
766  AssemblyMethod Method ,
767  class CoeffFunctionType = ElementComputationConstantCoefficient
768  >
770 
771 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
772  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
774  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
775  CrsMatrix< ScalarType , ExecutionSpace > ,
776  Analytic , CoeffFunctionType > :
777  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
778  ScalarType> {
779 public:
780 
781  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
782  ScalarType> base_type;
783 
786 
787  static const unsigned FunctionCount = base_type::FunctionCount;
788  static const unsigned IntegrationCount = base_type::IntegrationCount;
789  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
790 
791  const CoeffFunctionType coeff_function ;
793 
795  base_type(rhs) ,
796  coeff_function( rhs.coeff_function ) ,
797  dev_config( rhs.dev_config ) {}
798 
800  const typename base_type::mesh_type & arg_mesh ,
801  const CoeffFunctionType & arg_coeff_function ,
802  const typename base_type::vector_type & arg_solution ,
803  const typename base_type::elem_graph_type & arg_elem_graph ,
804  const typename base_type::sparse_matrix_type & arg_jacobian ,
805  const typename base_type::vector_type & arg_residual ,
806  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
807  base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
808  arg_residual),
809  coeff_function( arg_coeff_function ) ,
810  dev_config( arg_dev_config ) {}
811 
812  //------------------------------------
813 
814  void apply() const
815  {
816  const size_t nelem = this->elem_node_ids.extent(0);
817  parallel_for( nelem , *this );
818  }
819 
820  KOKKOS_INLINE_FUNCTION
821  void gatherSolution(const unsigned ielem,
822  scalar_type val[],
823  unsigned node_index[],
824  double x[], double y[], double z[],
825  scalar_type res[],
826  scalar_type mat[][FunctionCount]) const
827  {
828  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
829  const unsigned ni = this->elem_node_ids( ielem , i );
830 
831  node_index[i] = ni ;
832 
833  x[i] = this->node_coords( ni , 0 );
834  y[i] = this->node_coords( ni , 1 );
835  z[i] = this->node_coords( ni , 2 );
836 
837  val[i] = this->solution( ni ) ;
838  res[i] = 0 ;
839 
840  for( unsigned j = 0; j < FunctionCount ; j++){
841  mat[i][j] = 0 ;
842  }
843  }
844  }
845 
846  KOKKOS_INLINE_FUNCTION
847  void scatterResidual(const unsigned ielem,
848  const unsigned node_index[],
849  const scalar_type res[],
850  const scalar_type mat[][FunctionCount]) const
851  {
852  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
853  const unsigned row = node_index[i] ;
854  if ( row < this->residual.extent(0) ) {
855  atomic_add( & this->residual( row ) , res[i] );
856 
857  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
858  const unsigned entry = this->elem_graph( ielem , i , j );
859  if ( entry != ~0u ) {
860  atomic_add( & this->jacobian.values( entry ) , mat[i][j] );
861  }
862  }
863  }
864  }
865  }
866 
867  KOKKOS_INLINE_FUNCTION
869  const scalar_type dof_values[] ,
870  const double x[],
871  const double y[],
872  const double z[],
873  scalar_type elem_res[] ,
874  scalar_type elem_mat[][FunctionCount] ) const
875  {
876  scalar_type coeff_k;
877  double coeff_src = 1.234;
878  double advection[] = { 1.1, 1.2, 1.3 };
879  double dpsidx[ FunctionCount ] ;
880  double dpsidy[ FunctionCount ] ;
881  double dpsidz[ FunctionCount ] ;
882  double pt[] = {0.0, 0.0, 0.0};
883  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
884 
885  // If function is not constant
886  // then compute physical coordinates of integration point
887  if ( ! coeff_function.is_constant ) {
888  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
889  pt[0] += x[j] * this->elem_data.values[i][j] ;
890  pt[1] += y[j] * this->elem_data.values[i][j] ;
891  pt[2] += z[j] * this->elem_data.values[i][j] ;
892  }
893  }
894  coeff_k = coeff_function(pt, 0);
895 
896  const double integ_weight = this->elem_data.weights[i];
897  const double* bases_vals = this->elem_data.values[i];
898  const double detJ =
899  this->transform_gradients( this->elem_data.gradients[i] ,
900  x , y , z ,
901  dpsidx , dpsidy , dpsidz );
902  const double detJ_weight = detJ * integ_weight;
903  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
904 
905  scalar_type value_at_pt = 0 ;
906  scalar_type gradx_at_pt = 0 ;
907  scalar_type grady_at_pt = 0 ;
908  scalar_type gradz_at_pt = 0 ;
909  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
910  value_at_pt += dof_values[m] * bases_vals[m] ;
911  gradx_at_pt += dof_values[m] * dpsidx[m] ;
912  grady_at_pt += dof_values[m] * dpsidy[m] ;
913  gradz_at_pt += dof_values[m] * dpsidz[m] ;
914  }
915 
916  const scalar_type source_term =
917  coeff_src * value_at_pt * value_at_pt ;
918  const scalar_type source_deriv =
919  2.0 * coeff_src * value_at_pt ;
920 
921  const scalar_type advection_x = advection[0];
922  const scalar_type advection_y = advection[1];
923  const scalar_type advection_z = advection[2];
924 
925  const scalar_type advection_term =
926  advection_x*gradx_at_pt +
927  advection_y*grady_at_pt +
928  advection_z*gradz_at_pt ;
929 
930  for ( unsigned m = 0; m < FunctionCount; ++m) {
931  scalar_type * const mat = elem_mat[m] ;
932  const double bases_val_m = bases_vals[m] * detJ_weight ;
933  const double dpsidx_m = dpsidx[m] ;
934  const double dpsidy_m = dpsidy[m] ;
935  const double dpsidz_m = dpsidz[m] ;
936 
937  elem_res[m] +=
938  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
939  dpsidy_m * grady_at_pt +
940  dpsidz_m * gradz_at_pt ) +
941  bases_val_m * ( advection_term + source_term ) ;
942 
943  for( unsigned n = 0; n < FunctionCount; n++) {
944  const double dpsidx_n = dpsidx[n] ;
945  const double dpsidy_n = dpsidy[n] ;
946  const double dpsidz_n = dpsidz[n] ;
947  mat[n] +=
948  detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
949  dpsidy_m * dpsidy_n +
950  dpsidz_m * dpsidz_n ) +
951  bases_val_m * ( advection_x * dpsidx_n +
952  advection_y * dpsidy_n +
953  advection_z * dpsidz_n +
954  source_deriv * bases_vals[n] ) ;
955  }
956  }
957  }
958  }
959 
960  KOKKOS_INLINE_FUNCTION
961  void operator()( const unsigned ielem ) const
962  {
963  double x[ FunctionCount ] ;
964  double y[ FunctionCount ] ;
965  double z[ FunctionCount ] ;
966  unsigned node_index[ ElemNodeCount ];
967 
969  scalar_type elem_res[ FunctionCount ] ;
970  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
971 
972  // Gather nodal coordinates and solution vector:
973  gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
974 
975  // Compute nodal element residual vector and Jacobian matrix
976  computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
977 
978  // Scatter nodal element residual and Jacobian in global vector and matrix:
979  scatterResidual( ielem, node_index, elem_res, elem_mat );
980  }
981 }; /* ElementComputation */
982 
983 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
984  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
986  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
987  CrsMatrix< ScalarType , ExecutionSpace > ,
988  FadElement , CoeffFunctionType > :
989  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
990  ScalarType> {
991 public:
992 
993  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
994  ScalarType> base_type;
995 
998 
999  static const unsigned FunctionCount = base_type::FunctionCount;
1000  static const unsigned IntegrationCount = base_type::IntegrationCount;
1001  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1002 
1003  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1004  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1005 
1006  const CoeffFunctionType coeff_function ;
1008 
1010  base_type(rhs) ,
1011  coeff_function( rhs.coeff_function ) ,
1012  dev_config( rhs.dev_config ) {}
1013 
1015  const typename base_type::mesh_type & arg_mesh ,
1016  const CoeffFunctionType & arg_coeff_function ,
1017  const typename base_type::vector_type & arg_solution ,
1018  const typename base_type::elem_graph_type & arg_elem_graph ,
1019  const typename base_type::sparse_matrix_type & arg_jacobian ,
1020  const typename base_type::vector_type & arg_residual ,
1021  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1022  base_type(arg_mesh, arg_solution, arg_elem_graph,
1023  arg_jacobian, arg_residual),
1024  coeff_function( arg_coeff_function ) ,
1025  dev_config( arg_dev_config ) {}
1026 
1027  //------------------------------------
1028 
1029  void apply() const
1030  {
1031  const size_t nelem = this->elem_node_ids.extent(0);
1032  parallel_for( nelem , *this );
1033  }
1034 
1035  KOKKOS_INLINE_FUNCTION
1036  void gatherSolution(const unsigned ielem,
1037  fad_scalar_type val[],
1038  unsigned node_index[],
1039  double x[], double y[], double z[],
1040  fad_scalar_type res[]) const
1041  {
1042  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1043  const unsigned ni = this->elem_node_ids( ielem , i );
1044 
1045  node_index[i] = ni ;
1046 
1047  x[i] = this->node_coords( ni , 0 );
1048  y[i] = this->node_coords( ni , 1 );
1049  z[i] = this->node_coords( ni , 2 );
1050 
1051  val[i].val() = this->solution( ni );
1052  val[i].diff( i, FunctionCount );
1053  }
1054  }
1055 
1056  KOKKOS_INLINE_FUNCTION
1057  void scatterResidual(const unsigned ielem,
1058  const unsigned node_index[],
1059  fad_scalar_type res[]) const
1060  {
1061  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1062  const unsigned row = node_index[i] ;
1063  if ( row < this->residual.extent(0) ) {
1064  atomic_add( & this->residual( row ) , res[i].val() );
1065 
1066  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1067  const unsigned entry = this->elem_graph( ielem , i , j );
1068  if ( entry != ~0u ) {
1069  atomic_add( & this->jacobian.values( entry ) ,
1070  res[i].fastAccessDx(j) );
1071  }
1072  }
1073  }
1074  }
1075  }
1076 
1077  template <typename local_scalar_type>
1078  KOKKOS_INLINE_FUNCTION
1079  void computeElementResidual(const local_scalar_type dof_values[] ,
1080  const double x[],
1081  const double y[],
1082  const double z[],
1083  local_scalar_type elem_res[] ) const
1084  {
1085  scalar_type coeff_k;
1086  double coeff_src = 1.234;
1087  double advection[] = { 1.1, 1.2, 1.3 };
1088  double dpsidx[ FunctionCount ] ;
1089  double dpsidy[ FunctionCount ] ;
1090  double dpsidz[ FunctionCount ] ;
1091  double pt[] = {0.0, 0.0, 0.0};
1092  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1093 
1094  // If function is not constant
1095  // then compute physical coordinates of integration point
1096  if ( ! coeff_function.is_constant ) {
1097  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1098  pt[0] += x[j] * this->elem_data.values[i][j] ;
1099  pt[1] += y[j] * this->elem_data.values[i][j] ;
1100  pt[2] += z[j] * this->elem_data.values[i][j] ;
1101  }
1102  }
1103  coeff_k = coeff_function(pt, 0);
1104 
1105  const double integ_weight = this->elem_data.weights[i];
1106  const double* bases_vals = this->elem_data.values[i];
1107  const double detJ =
1108  this->transform_gradients( this->elem_data.gradients[i] ,
1109  x , y , z ,
1110  dpsidx , dpsidy , dpsidz );
1111  const double detJ_weight = detJ * integ_weight;
1112  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1113 
1114  local_scalar_type value_at_pt = 0 ;
1115  local_scalar_type gradx_at_pt = 0 ;
1116  local_scalar_type grady_at_pt = 0 ;
1117  local_scalar_type gradz_at_pt = 0 ;
1118  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1119  value_at_pt += dof_values[m] * bases_vals[m] ;
1120  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1121  grady_at_pt += dof_values[m] * dpsidy[m] ;
1122  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1123  }
1124 
1125  const local_scalar_type source_term =
1126  coeff_src * value_at_pt * value_at_pt ;
1127 
1128  const local_scalar_type advection_term =
1129  advection[0]*gradx_at_pt +
1130  advection[1]*grady_at_pt +
1131  advection[2]*gradz_at_pt;
1132 
1133  for ( unsigned m = 0; m < FunctionCount; ++m) {
1134  const double bases_val_m = bases_vals[m] * detJ_weight ;
1135  const double dpsidx_m = dpsidx[m] ;
1136  const double dpsidy_m = dpsidy[m] ;
1137  const double dpsidz_m = dpsidz[m] ;
1138 
1139  elem_res[m] +=
1140  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1141  dpsidy_m * grady_at_pt +
1142  dpsidz_m * gradz_at_pt ) +
1143  bases_val_m * ( advection_term + source_term ) ;
1144  }
1145  }
1146  }
1147 
1148  KOKKOS_INLINE_FUNCTION
1149  void operator()( const unsigned ielem ) const
1150  {
1151  double x[ FunctionCount ] ;
1152  double y[ FunctionCount ] ;
1153  double z[ FunctionCount ] ;
1154  unsigned node_index[ ElemNodeCount ];
1155 
1157  fad_scalar_type elem_res[ FunctionCount ] ; // this zeros elem_res
1158 
1159  // Gather nodal coordinates and solution vector:
1160  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1161 
1162  // Compute nodal element residual vector:
1163  computeElementResidual( val, x, y, z, elem_res );
1164 
1165  // Scatter nodal element residual in global vector:
1166  scatterResidual( ielem, node_index, elem_res );
1167  }
1168 }; /* ElementComputation */
1169 
1170 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1171  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1173  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1174  CrsMatrix< ScalarType , ExecutionSpace > ,
1175  FadElementOptimized , CoeffFunctionType > :
1176  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1177  CrsMatrix< ScalarType , ExecutionSpace > ,
1178  FadElement, CoeffFunctionType > {
1179 public:
1180 
1183  FadElement , CoeffFunctionType > base_type;
1184 
1187 
1188  static const unsigned FunctionCount = base_type::FunctionCount;
1189  static const unsigned IntegrationCount = base_type::IntegrationCount;
1190  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1191 
1192  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1193  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1194 
1196 
1198  const typename base_type::mesh_type & arg_mesh ,
1199  const CoeffFunctionType & arg_coeff_function ,
1200  const typename base_type::vector_type & arg_solution ,
1201  const typename base_type::elem_graph_type & arg_elem_graph ,
1202  const typename base_type::sparse_matrix_type & arg_jacobian ,
1203  const typename base_type::vector_type & arg_residual ,
1204  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1205  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1206  arg_jacobian, arg_residual, arg_dev_config) {}
1207 
1208  //------------------------------------
1209 
1210  void apply() const
1211  {
1212  const size_t nelem = this->elem_node_ids.extent(0);
1213  parallel_for( nelem , *this );
1214  }
1215 
1216  KOKKOS_INLINE_FUNCTION
1217  void gatherSolution(const unsigned ielem,
1218  scalar_type val[],
1219  unsigned node_index[],
1220  double x[], double y[], double z[],
1221  fad_scalar_type res[]) const
1222  {
1223  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1224  const unsigned ni = this->elem_node_ids( ielem , i );
1225 
1226  node_index[i] = ni ;
1227 
1228  x[i] = this->node_coords( ni , 0 );
1229  y[i] = this->node_coords( ni , 1 );
1230  z[i] = this->node_coords( ni , 2 );
1231 
1232  val[i] = this->solution( ni );
1233  }
1234  }
1235 
1236  template <typename local_scalar_type>
1237  KOKKOS_INLINE_FUNCTION
1238  void computeElementResidual(const scalar_type dof_values[] ,
1239  const double x[],
1240  const double y[],
1241  const double z[],
1242  local_scalar_type elem_res[] ) const
1243  {
1244  scalar_type coeff_k;
1245  double coeff_src = 1.234;
1246  double advection[] = { 1.1, 1.2, 1.3 };
1247  double dpsidx[ FunctionCount ] ;
1248  double dpsidy[ FunctionCount ] ;
1249  double dpsidz[ FunctionCount ] ;
1250  double pt[] = {0.0, 0.0, 0.0};
1251  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1252 
1253  // If function is not constant
1254  // then compute physical coordinates of integration point
1255  if ( ! this->coeff_function.is_constant ) {
1256  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1257  pt[0] += x[j] * this->elem_data.values[i][j] ;
1258  pt[1] += y[j] * this->elem_data.values[i][j] ;
1259  pt[2] += z[j] * this->elem_data.values[i][j] ;
1260  }
1261  }
1262  coeff_k = this->coeff_function(pt, 0);
1263 
1264  const double integ_weight = this->elem_data.weights[i];
1265  const double* bases_vals = this->elem_data.values[i];
1266  const double detJ =
1267  this->transform_gradients( this->elem_data.gradients[i] ,
1268  x , y , z ,
1269  dpsidx , dpsidy , dpsidz );
1270  const double detJ_weight = detJ * integ_weight;
1271  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1272 
1273  local_scalar_type value_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1274  local_scalar_type gradx_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1275  local_scalar_type grady_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1276  local_scalar_type gradz_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1277  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1278  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1279  value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1280 
1281  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1282  gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1283 
1284  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1285  grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1286 
1287  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1288  gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1289  }
1290 
1291  const local_scalar_type source_term =
1292  coeff_src * value_at_pt * value_at_pt ;
1293 
1294  const local_scalar_type advection_term =
1295  advection[0]*gradx_at_pt +
1296  advection[1]*grady_at_pt +
1297  advection[2]*gradz_at_pt;
1298 
1299  for ( unsigned m = 0; m < FunctionCount; ++m) {
1300  const double bases_val_m = bases_vals[m] * detJ_weight ;
1301  const double dpsidx_m = dpsidx[m] ;
1302  const double dpsidy_m = dpsidy[m] ;
1303  const double dpsidz_m = dpsidz[m] ;
1304 
1305  elem_res[m] +=
1306  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1307  dpsidy_m * grady_at_pt +
1308  dpsidz_m * gradz_at_pt ) +
1309  bases_val_m * ( advection_term + source_term ) ;
1310  }
1311  }
1312  }
1313 
1314  KOKKOS_INLINE_FUNCTION
1315  void operator()( const unsigned ielem ) const
1316  {
1317  double x[ FunctionCount ] ;
1318  double y[ FunctionCount ] ;
1319  double z[ FunctionCount ] ;
1320  unsigned node_index[ ElemNodeCount ];
1321 
1323  fad_scalar_type elem_res[ FunctionCount ] ;
1324 
1325  // Gather nodal coordinates and solution vector:
1326  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1327 
1328  // Compute nodal element residual vector:
1329  computeElementResidual( val, x, y, z, elem_res );
1330 
1331  // Scatter nodal element residual in global vector:
1332  this->scatterResidual( ielem, node_index, elem_res );
1333  }
1334 }; /* ElementComputation */
1335 
1336 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1337  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1339  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1340  CrsMatrix< ScalarType , ExecutionSpace > ,
1341  FadQuadPoint , CoeffFunctionType > :
1342  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1343  CrsMatrix< ScalarType , ExecutionSpace > ,
1344  Analytic , CoeffFunctionType > {
1345 public:
1346 
1349  Analytic , CoeffFunctionType > base_type;
1350 
1353 
1354  static const unsigned FunctionCount = base_type::FunctionCount;
1355  static const unsigned IntegrationCount = base_type::IntegrationCount;
1356  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1357 
1358  typedef Sacado::Fad::SLFad<scalar_type,4> fad_scalar_type;
1359  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1360 
1362 
1364  const typename base_type::mesh_type & arg_mesh ,
1365  const CoeffFunctionType & arg_coeff_function ,
1366  const typename base_type::vector_type & arg_solution ,
1367  const typename base_type::elem_graph_type & arg_elem_graph ,
1368  const typename base_type::sparse_matrix_type & arg_jacobian ,
1369  const typename base_type::vector_type & arg_residual ,
1370  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1371  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1372  arg_jacobian, arg_residual, arg_dev_config) {}
1373 
1374  //------------------------------------
1375 
1376  void apply() const
1377  {
1378  const size_t nelem = this->elem_node_ids.extent(0);
1379  parallel_for( nelem , *this );
1380  }
1381 
1382  KOKKOS_INLINE_FUNCTION
1384  const scalar_type dof_values[] ,
1385  const double x[],
1386  const double y[],
1387  const double z[],
1388  scalar_type elem_res[] ,
1389  scalar_type elem_mat[][FunctionCount] ) const
1390  {
1391  scalar_type coeff_k;
1392  double coeff_src = 1.234;
1393  double advection[] = { 1.1, 1.2, 1.3 };
1394  double dpsidx[ FunctionCount ] ;
1395  double dpsidy[ FunctionCount ] ;
1396  double dpsidz[ FunctionCount ] ;
1397  double pt[] = {0.0, 0.0, 0.0};
1398 
1399  fad_scalar_type value_at_pt(4, 0, 0.0) ;
1400  fad_scalar_type gradx_at_pt(4, 1, 0.0) ;
1401  fad_scalar_type grady_at_pt(4, 2, 0.0) ;
1402  fad_scalar_type gradz_at_pt(4, 3, 0.0) ;
1403  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1404 
1405  // If function is not constant
1406  // then compute physical coordinates of integration point
1407  if ( ! this->coeff_function.is_constant ) {
1408  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1409  pt[0] += x[j] * this->elem_data.values[i][j] ;
1410  pt[1] += y[j] * this->elem_data.values[i][j] ;
1411  pt[2] += z[j] * this->elem_data.values[i][j] ;
1412  }
1413  }
1414  coeff_k = this->coeff_function(pt, 0);
1415 
1416  const double integ_weight = this->elem_data.weights[i];
1417  const double* bases_vals = this->elem_data.values[i];
1418  const double detJ =
1419  this->transform_gradients( this->elem_data.gradients[i] ,
1420  x , y , z ,
1421  dpsidx , dpsidy , dpsidz );
1422  const double detJ_weight = detJ * integ_weight;
1423  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1424 
1425  value_at_pt.val() = 0.0 ;
1426  gradx_at_pt.val() = 0.0 ;
1427  grady_at_pt.val() = 0.0 ;
1428  gradz_at_pt.val() = 0.0 ;
1429  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1430  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1431  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1432  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1433  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1434  }
1435 
1436  const fad_scalar_type source_term =
1437  coeff_src * value_at_pt * value_at_pt ;
1438 
1439  const fad_scalar_type advection_term =
1440  advection[0]*gradx_at_pt +
1441  advection[1]*grady_at_pt +
1442  advection[2]*gradz_at_pt;
1443 
1444  for ( unsigned m = 0; m < FunctionCount; ++m) {
1445  const double bases_val_m = bases_vals[m] * detJ_weight ;
1446  fad_scalar_type res =
1447  detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1448  dpsidy[m] * grady_at_pt +
1449  dpsidz[m] * gradz_at_pt ) +
1450  bases_val_m * ( advection_term + source_term ) ;
1451 
1452  elem_res[m] += res.val();
1453 
1454  scalar_type * const mat = elem_mat[m] ;
1455  for( unsigned n = 0; n < FunctionCount; n++) {
1456  mat[n] += res.fastAccessDx(0) * bases_vals[n] +
1457  res.fastAccessDx(1) * dpsidx[n] +
1458  res.fastAccessDx(2) * dpsidy[n] +
1459  res.fastAccessDx(3) * dpsidz[n];
1460  }
1461  }
1462  }
1463  }
1464 
1465  KOKKOS_INLINE_FUNCTION
1466  void operator()( const unsigned ielem ) const
1467  {
1468  double x[ FunctionCount ] ;
1469  double y[ FunctionCount ] ;
1470  double z[ FunctionCount ] ;
1471  unsigned node_index[ ElemNodeCount ];
1472 
1474  scalar_type elem_res[ FunctionCount ] ;
1475  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1476 
1477  // Gather nodal coordinates and solution vector:
1478  this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1479 
1480  // Compute nodal element residual vector and Jacobian matrix:
1481  computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1482 
1483  // Scatter nodal element residual and Jacobian in global vector and matrix:
1484  this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1485  }
1486 }; /* ElementComputation */
1487 
1488 #if 0
1489 template< class FiniteElementMeshType , class SparseMatrixType
1490  , class CoeffFunctionType = ElementComputationConstantCoefficient
1491  >
1492 class ElementComputation ;
1493 
1494 
1495 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1496  typename ScalarType , class CoeffFunctionType >
1497 class ElementComputation
1498  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap >
1499  , Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace >
1500  , CoeffFunctionType >
1501 {
1502 public:
1503 
1506 
1507  //------------------------------------
1508 
1509  typedef ExecutionSpace execution_space ;
1510  typedef ScalarType scalar_type ;
1511 
1514  typedef typename sparse_matrix_type::values_type matrix_values_type ;
1515  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
1516 
1517  //------------------------------------
1518 
1519  typedef LocalViewTraits< vector_type > local_vector_view_traits;
1520  typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1521  typedef typename local_vector_view_traits::local_view_type local_vector_type;
1522  typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1523  typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1524  static const bool use_team = local_vector_view_traits::use_team;
1525 
1526  static const unsigned SpatialDim = element_data_type::spatial_dimension ;
1527  static const unsigned TensorDim = SpatialDim * SpatialDim ;
1528  static const unsigned ElemNodeCount = element_data_type::element_node_count ;
1529  static const unsigned FunctionCount = element_data_type::function_count ;
1530  static const unsigned IntegrationCount = element_data_type::integration_count ;
1531 
1532  //------------------------------------
1533 
1534  typedef typename mesh_type::node_coord_type node_coord_type ;
1535  typedef typename mesh_type::elem_node_type elem_node_type ;
1536  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
1537  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
1538 
1539  typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1540  typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1541  typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1542  typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1543 
1544  typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ;
1545 
1546  //------------------------------------
1547 
1548 
1549  //------------------------------------
1550  // Computational data:
1551 
1555  const elem_graph_type elem_graph ;
1558  const vector_type solution ;
1559  const vector_type residual ;
1561  const CoeffFunctionType coeff_function ;
1562  const Kokkos::Example::FENL::DeviceConfig dev_config ;
1563 
1564  ElementComputation( const ElementComputation & rhs )
1565  : elem_data()
1566  , elem_node_ids( rhs.elem_node_ids )
1567  , node_coords( rhs.node_coords )
1568  , elem_graph( rhs.elem_graph )
1571  , solution( rhs.solution )
1572  , residual( rhs.residual )
1573  , jacobian( rhs.jacobian )
1574  , coeff_function( rhs.coeff_function )
1575  , dev_config( rhs.dev_config )
1576  {}
1577 
1578  // If the element->sparse_matrix graph is provided then perform atomic updates
1579  // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
1580  ElementComputation( const mesh_type & arg_mesh ,
1581  const CoeffFunctionType & arg_coeff_function ,
1582  const vector_type & arg_solution ,
1583  const elem_graph_type & arg_elem_graph ,
1584  const sparse_matrix_type & arg_jacobian ,
1585  const vector_type & arg_residual ,
1586  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1587  : elem_data()
1588  , elem_node_ids( arg_mesh.elem_node() )
1589  , node_coords( arg_mesh.node_coord() )
1590  , elem_graph( arg_elem_graph )
1591  , elem_jacobians()
1592  , elem_residuals()
1593  , solution( arg_solution )
1594  , residual( arg_residual )
1595  , jacobian( arg_jacobian )
1596  , coeff_function( arg_coeff_function )
1597  , dev_config( arg_dev_config )
1598  {}
1599 
1600  //------------------------------------
1601 
1602  void apply() const
1603  {
1604  const size_t nelem = elem_node_ids.extent(0);
1605  if ( use_team ) {
1606  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1607  const size_t league_size =
1608  (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1609  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1610  parallel_for( config , *this );
1611  }
1612  else {
1613  parallel_for( nelem , *this );
1614  }
1615  }
1616 
1617  //------------------------------------
1618 
1619  static const unsigned FLOPS_transform_gradients =
1620  /* Jacobian */ FunctionCount * TensorDim * 2 +
1621  /* Inverse jacobian */ TensorDim * 6 + 6 +
1622  /* Gradient transform */ FunctionCount * 15 ;
1623 
1624  KOKKOS_INLINE_FUNCTION
1625  double transform_gradients(
1626  const double grad[][ FunctionCount ] , // Gradient of bases master element
1627  const double x[] ,
1628  const double y[] ,
1629  const double z[] ,
1630  double dpsidx[] ,
1631  double dpsidy[] ,
1632  double dpsidz[] ) const
1633  {
1634  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1635  j21 = 3 , j22 = 4 , j23 = 5 ,
1636  j31 = 6 , j32 = 7 , j33 = 8 };
1637 
1638  // Jacobian accumulation:
1639 
1640  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1641 
1642  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1643  const double x1 = x[i] ;
1644  const double x2 = y[i] ;
1645  const double x3 = z[i] ;
1646 
1647  const double g1 = grad[0][i] ;
1648  const double g2 = grad[1][i] ;
1649  const double g3 = grad[2][i] ;
1650 
1651  J[j11] += g1 * x1 ;
1652  J[j12] += g1 * x2 ;
1653  J[j13] += g1 * x3 ;
1654 
1655  J[j21] += g2 * x1 ;
1656  J[j22] += g2 * x2 ;
1657  J[j23] += g2 * x3 ;
1658 
1659  J[j31] += g3 * x1 ;
1660  J[j32] += g3 * x2 ;
1661  J[j33] += g3 * x3 ;
1662  }
1663 
1664  // Inverse jacobian:
1665 
1666  double invJ[ TensorDim ] = {
1667  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1668  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1669  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1670 
1671  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1672  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1673  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1674 
1675  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1676  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1677  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1678 
1679  const double detJ = J[j11] * invJ[j11] +
1680  J[j21] * invJ[j12] +
1681  J[j31] * invJ[j13] ;
1682 
1683  const double detJinv = 1.0 / detJ ;
1684 
1685  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1686 
1687  // Transform gradients:
1688 
1689  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1690  const double g0 = grad[0][i];
1691  const double g1 = grad[1][i];
1692  const double g2 = grad[2][i];
1693 
1694  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1695  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1696  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1697  }
1698 
1699  return detJ ;
1700  }
1701 
1702  KOKKOS_INLINE_FUNCTION
1703  void contributeResidualJacobian(
1704  const local_scalar_type dof_values[] ,
1705  const double dpsidx[] ,
1706  const double dpsidy[] ,
1707  const double dpsidz[] ,
1708  const double detJ ,
1709  const local_scalar_type coeff_k ,
1710  const double integ_weight ,
1711  const double bases_vals[] ,
1712  local_scalar_type elem_res[] ,
1713  local_scalar_type elem_mat[][ FunctionCount ] ) const
1714  {
1715  local_scalar_type value_at_pt = 0 ;
1716  local_scalar_type gradx_at_pt = 0 ;
1717  local_scalar_type grady_at_pt = 0 ;
1718  local_scalar_type gradz_at_pt = 0 ;
1719 
1720  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1721  value_at_pt += dof_values[m] * bases_vals[m] ;
1722  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1723  grady_at_pt += dof_values[m] * dpsidy[m] ;
1724  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1725  }
1726 
1727  const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1728  const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1729  const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1730 
1731  // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
1732  // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
1733 
1734  for ( unsigned m = 0; m < FunctionCount; ++m) {
1735  local_scalar_type * const mat = elem_mat[m] ;
1736  const double bases_val_m = bases_vals[m];
1737  const double dpsidx_m = dpsidx[m] ;
1738  const double dpsidy_m = dpsidy[m] ;
1739  const double dpsidz_m = dpsidz[m] ;
1740 
1741  elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1742  dpsidy_m * grady_at_pt +
1743  dpsidz_m * gradz_at_pt ) +
1744  res_val * bases_val_m ;
1745 
1746  for( unsigned n = 0; n < FunctionCount; n++) {
1747 
1748  mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
1749  dpsidy_m * dpsidy[n] +
1750  dpsidz_m * dpsidz[n] ) +
1751  mat_val * bases_val_m * bases_vals[n];
1752  }
1753  }
1754  }
1755 
1756  KOKKOS_INLINE_FUNCTION
1757  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1758  {
1759 
1760  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1761  const unsigned num_element_threads = dev_config.block_dim.y ;
1762  const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1763  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1764 
1765  const unsigned ielem =
1766  dev.league_rank() * num_element_threads + element_rank;
1767 
1768  if (ielem >= elem_node_ids.extent(0))
1769  return;
1770 
1771  (*this)( ielem, ensemble_rank );
1772 
1773  }
1774 
1775  KOKKOS_INLINE_FUNCTION
1776  void operator()( const unsigned ielem ,
1777  const unsigned ensemble_rank = 0 ) const
1778  {
1779  local_vector_type local_solution =
1780  local_vector_view_traits::create_local_view(solution,
1781  ensemble_rank);
1782  local_vector_type local_residual =
1783  local_vector_view_traits::create_local_view(residual,
1784  ensemble_rank);
1785  local_matrix_type local_jacobian_values =
1786  local_matrix_view_traits::create_local_view(jacobian.values,
1787  ensemble_rank);
1788 
1789  // Gather nodal coordinates and solution vector:
1790 
1791  double x[ FunctionCount ] ;
1792  double y[ FunctionCount ] ;
1793  double z[ FunctionCount ] ;
1794  local_scalar_type val[ FunctionCount ] ;
1795  unsigned node_index[ ElemNodeCount ];
1796 
1797  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1798  const unsigned ni = elem_node_ids( ielem , i );
1799 
1800  node_index[i] = ni ;
1801 
1802  x[i] = node_coords( ni , 0 );
1803  y[i] = node_coords( ni , 1 );
1804  z[i] = node_coords( ni , 2 );
1805 
1806  val[i] = local_solution( ni );
1807  }
1808 
1809 
1810  local_scalar_type elem_vec[ FunctionCount ] ;
1811  local_scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1812 
1813  for( unsigned i = 0; i < FunctionCount ; i++ ) {
1814  elem_vec[i] = 0 ;
1815  for( unsigned j = 0; j < FunctionCount ; j++){
1816  elem_mat[i][j] = 0 ;
1817  }
1818  }
1819 
1820 
1821  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1822  double dpsidx[ FunctionCount ] ;
1823  double dpsidy[ FunctionCount ] ;
1824  double dpsidz[ FunctionCount ] ;
1825 
1826  local_scalar_type coeff_k = 0 ;
1827 
1828  {
1829  double pt[] = {0.0, 0.0, 0.0};
1830 
1831  // If function is not constant
1832  // then compute physical coordinates of integration point
1833  if ( ! coeff_function.is_constant ) {
1834  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1835  pt[0] += x[j] * elem_data.values[i][j] ;
1836  pt[1] += y[j] * elem_data.values[i][j] ;
1837  pt[2] += z[j] * elem_data.values[i][j] ;
1838  }
1839  }
1840 
1841  // Need to fix this for local_scalar_type!!!!!!
1842  coeff_k = coeff_function(pt, ensemble_rank);
1843  }
1844 
1845  const double detJ =
1846  transform_gradients( elem_data.gradients[i] , x , y , z ,
1847  dpsidx , dpsidy , dpsidz );
1848 
1849  contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1850  detJ , coeff_k ,
1851  elem_data.weights[i] ,
1852  elem_data.values[i] ,
1853  elem_vec , elem_mat );
1854  }
1855 
1856  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1857  const unsigned row = node_index[i] ;
1858  if ( row < residual.extent(0) ) {
1859  atomic_add( & local_residual( row ) , elem_vec[i] );
1860 
1861  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1862  const unsigned entry = elem_graph( ielem , i , j );
1863  if ( entry != ~0u ) {
1864  atomic_add( & local_jacobian_values( entry ) , elem_mat[i][j] );
1865  }
1866  }
1867  }
1868  }
1869  }
1870 }; /* ElementComputation */
1871 #endif
1872 
1873 //----------------------------------------------------------------------------
1874 
1875 template< class FixtureType , class SparseMatrixType >
1877 
1878 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1879  typename ScalarType >
1881  Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1882  Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace > >
1883 {
1884 public:
1885 
1889 
1890  typedef ExecutionSpace execution_space ;
1891  typedef ScalarType scalar_type ;
1892 
1896  typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
1897 
1898  //------------------------------------
1899 
1904  static const bool use_team = local_vector_view_traits::use_team;
1905 
1906  typedef double bc_scalar_type ;
1907 
1908  //------------------------------------
1909  // Computational data:
1910 
1919  const unsigned bc_plane ;
1920  const unsigned node_count ;
1921  bool init ;
1923 
1924 
1925  DirichletComputation( const mesh_type & arg_mesh ,
1926  const vector_type & arg_solution ,
1927  const sparse_matrix_type & arg_jacobian ,
1928  const vector_type & arg_residual ,
1929  const unsigned arg_bc_plane ,
1930  const bc_scalar_type arg_bc_lower_value ,
1931  const bc_scalar_type arg_bc_upper_value ,
1932  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1933  : node_coords( arg_mesh.node_coord() )
1934  , solution( arg_solution )
1935  , jacobian( arg_jacobian )
1936  , residual( arg_residual )
1937  , bc_lower_value( arg_bc_lower_value )
1938  , bc_upper_value( arg_bc_upper_value )
1939  , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
1940  , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
1941  , bc_plane( arg_bc_plane )
1942  , node_count( arg_mesh.node_count_owned() )
1943  , init( false )
1944  , dev_config( arg_dev_config )
1945  {
1946  parallel_for( node_count , *this );
1947  init = true ;
1948  }
1949 
1950  void apply() const
1951  {
1952  if ( use_team ) {
1953  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1954  const size_t league_size =
1955  (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1956  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1957  parallel_for( config , *this );
1958  }
1959  else
1960  parallel_for( node_count , *this );
1961  }
1962 
1963  //------------------------------------
1964 
1965  KOKKOS_INLINE_FUNCTION
1966  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1967  {
1968 
1969  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1970  const unsigned num_node_threads = dev_config.block_dim.y ;
1971  const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1972  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1973 
1974  const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1975 
1976  if (inode >= node_count)
1977  return;
1978 
1979  (*this)( inode, ensemble_rank );
1980 
1981  }
1982 
1983  KOKKOS_INLINE_FUNCTION
1984  void operator()( const unsigned inode ,
1985  const unsigned ensemble_rank = 0) const
1986  {
1987  local_vector_type local_residual =
1988  local_vector_view_traits::create_local_view(residual,
1989  ensemble_rank);
1990  local_matrix_type local_jacobian_values =
1991  local_matrix_view_traits::create_local_view(jacobian.values,
1992  ensemble_rank);
1993 
1994  // Apply dirichlet boundary condition on the Solution and Residual vectors.
1995  // To maintain the symmetry of the original global stiffness matrix,
1996  // zero out the columns that correspond to boundary conditions, and
1997  // update the residual vector accordingly
1998 
1999  const unsigned iBeg = jacobian.graph.row_map[inode];
2000  const unsigned iEnd = jacobian.graph.row_map[inode+1];
2001 
2002  const scalar_coord_type c = node_coords(inode,bc_plane);
2003  const bool bc_lower = c <= bc_lower_limit ;
2004  const bool bc_upper = bc_upper_limit <= c ;
2005 
2006  if ( ! init ) {
2007  solution(inode) = bc_lower ? bc_lower_value : (
2008  bc_upper ? bc_upper_value : 0 );
2009  }
2010  else {
2011  if ( bc_lower || bc_upper ) {
2012 
2013  local_residual(inode) = 0 ;
2014 
2015  // zero each value on the row, and leave a one
2016  // on the diagonal
2017 
2018  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2019  local_jacobian_values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
2020  }
2021  }
2022  else {
2023 
2024  // Find any columns that are boundary conditions.
2025  // Clear them and adjust the residual vector
2026 
2027  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2028  const unsigned cnode = jacobian.graph.entries(i) ;
2029  const scalar_coord_type cc = node_coords(cnode,bc_plane);
2030 
2031  if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2032  local_jacobian_values(i) = 0 ;
2033  }
2034  }
2035  }
2036  }
2037  }
2038 };
2039 
2040 } /* namespace FENL */
2041 } /* namespace Example */
2042 } /* namespace Kokkos */
2043 
2044 //----------------------------------------------------------------------------
2045 
2046 /* A Cuda-specific specialization for the element computation functor. */
2047 #if defined( __CUDACC__ )
2048 // #include <NonlinearElement_Cuda.hpp>
2049 #endif
2050 
2051 //----------------------------------------------------------------------------
2052 
2053 #endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
LocalViewTraits< RandomVariableView > local_rv_view_traits
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typename rv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::DefaultExecutionSpace execution_space
double gradients[integration_count][spatial_dimension][function_count]
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
local_rv_view_traits::local_view_type local_rv_view_type
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
ElemNodeIdView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
CrsGraphType::row_map_type::non_const_type RowMapType
KOKKOS_INLINE_FUNCTION void join(volatile unsigned &update, const volatile unsigned &input) const
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
CrsMatrix(const StaticCrsGraphType &arg_graph)
Kokkos::StaticCrsGraph< unsigned, Space, void, void, unsigned > StaticCrsGraphType
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
double values[integration_count][function_count]
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
expr val()
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
Kokkos::View< unsigned, execution_space > UnsignedValue
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
local_rv_view_traits::local_value_type local_scalar_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
Kokkos::UnorderedMap< key_type, void, execution_space > SetType
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const