Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadMPAssembly/fenl_functors.hpp
Go to the documentation of this file.
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
18 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
19 
20 #include <stdio.h>
21 
22 #include <iostream>
23 #include <fstream>
24 #include <iomanip>
25 #include <cstdlib>
26 #include <cmath>
27 #include <limits>
28 
29 #include <Kokkos_Pair.hpp>
30 #include <Kokkos_UnorderedMap.hpp>
31 #include <KokkosSparse_StaticCrsGraph.hpp>
32 
33 #include <Kokkos_Timer.hpp>
34 
35 #include <BoxElemFixture.hpp>
36 #include <HexElement.hpp>
37 
39 #include "Sacado.hpp"
40 //#include "Fad/Sacado_Fad_SFad_MP_Vector.hpp"
42 //#include "Fad/Sacado_Fad_DFad_MP_Vector.hpp"
43 
44 //----------------------------------------------------------------------------
45 //----------------------------------------------------------------------------
46 
47 namespace Kokkos {
48 namespace Example {
49 namespace FENL {
50 
51 struct DeviceConfig {
52  struct Dim3 {
53  size_t x, y, z;
54  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
55  x(x_), y(y_), z(z_) {}
56  };
57 
59  size_t num_blocks;
61 
62  DeviceConfig(const size_t num_blocks_ = 0,
63  const size_t threads_per_block_x_ = 0,
64  const size_t threads_per_block_y_ = 0,
65  const size_t threads_per_block_z_ = 1) :
66  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
67  num_blocks(num_blocks_),
69  {}
70 };
71 
72 template< typename ValueType , class Space >
73 struct CrsMatrix {
74 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
75  typedef KokkosSparse::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
76 #else
77  typedef KokkosSparse::StaticCrsGraph< unsigned , Space , void , void , unsigned > StaticCrsGraphType ;
78 #endif
79  typedef View< ValueType * , Space > values_type ;
80 
83 
84  CrsMatrix() : graph(), values() {}
85 
86  CrsMatrix( const StaticCrsGraphType & arg_graph )
87  : graph( arg_graph )
88  , values( "crs_matrix_values" , arg_graph.entries.extent(0) )
89  {}
90 };
91 
92 // Traits class for creating strided local views for embedded ensemble-based,
93 // specialized for ensemble UQ scalar type
94 template <typename ViewType, typename Enabled = void>
96  typedef ViewType view_type;
97  // typedef Kokkos::View<typename view_type::data_type,
98  // typename view_type::array_layout,
99  // typename view_type::execution_space,
100  // Kokkos::MemoryUnmanaged> local_view_type;
101  typedef const view_type& local_view_type;
103  static const bool use_team = false;
104  KOKKOS_INLINE_FUNCTION
106  const unsigned local_rank)
107  { return v; }
108 };
109 
110 #if defined( KOKKOS_ENABLE_CUDA )
111 
112 template <typename ViewType>
113 struct LocalViewTraits<
114  ViewType,
115  typename std::enable_if< std::is_same<typename ViewType::execution_space,
116  Kokkos::Cuda>::value &&
117  Kokkos::is_view_mp_vector<ViewType>::value
118  >::type > {
119  typedef ViewType view_type;
122  static const bool use_team = true;
123 
124  KOKKOS_INLINE_FUNCTION
126  const unsigned local_rank)
127  {
128  return Kokkos::partition<1>(v, local_rank);
129  }
130 };
131 
132 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
133 
134 // Compute DeviceConfig struct's based on scalar type
135 template <typename ScalarType>
137  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
138  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
139  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
140  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
141  }
142 };
143 
144 // Compute DeviceConfig struct's based on scalar type
145 template <typename StorageType>
146 struct CreateDeviceConfigs< Sacado::MP::Vector<StorageType> > {
148  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
149  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
150  static const unsigned VectorSize = StorageType::static_size;
151 #if defined( KOKKOS_ENABLE_CUDA )
152  enum { is_cuda = std::is_same< execution_space, Kokkos::Cuda >::value };
153 #else
154  enum { is_cuda = false };
155 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
156  if ( is_cuda ) {
157  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 64/VectorSize );
158  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 256/VectorSize );
159  }
160  else {
161  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
162  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
163  }
164  }
165 };
166 
167 template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
169 public:
170 
172  typedef pair<unsigned,unsigned> key_type ;
173 
174  typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
175  typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
176  typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
177 
178  // Static dimensions of 0 generate compiler warnings or errors.
179  typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
181 
182 private:
183 
189 
190  const unsigned node_count ;
191  const ElemNodeIdView elem_node_id ;
197 
198 public:
199 
200  CrsGraphType graph ;
202 
203  struct Times
204  {
205  double ratio;
211  };
212 
213  NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
214  const unsigned arg_node_count,
215  Times & results
216  )
217  : node_count(arg_node_count)
218  , elem_node_id( arg_elem_node_id )
219  , row_total( "row_total" )
220  , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
221  , row_map( "graph_row_map" , node_count + 1 )
222  , node_node_set()
223  , phase( FILL_NODE_SET )
224  , graph()
225  , elem_graph()
226  {
227  //--------------------------------
228  // Guess at capacity required for the map:
229 
230  Kokkos::Timer wall_clock ;
231 
232  wall_clock.reset();
233  phase = FILL_NODE_SET ;
234 
235  // upper bound on the capacity
236  size_t set_capacity = (((28ull * node_count) / 2ull)*4ull)/3ull;
237 
238 
239  // Increase capacity until the (node,node) map is successfully filled.
240  {
241  // Zero the row count to restart the fill
243 
244  node_node_set = SetType( set_capacity );
245 
246  // May be larger that requested:
247  set_capacity = node_node_set.capacity();
248 
249  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
250  }
251 
252  execution_space().fence();
253  results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
254  results.fill_node_set = wall_clock.seconds();
255  //--------------------------------
256 
257  wall_clock.reset();
259 
260  // Exclusive scan of row_count into row_map
261  // including the final total in the 'node_count + 1' position.
262  // Zero the 'row_count' values.
263  Kokkos::parallel_scan( node_count , *this );
264 
265  // Zero the row count for the fill:
267 
268  unsigned graph_entry_count = 0 ;
269 
270  Kokkos::deep_copy( graph_entry_count , row_total );
271 
272  // Assign graph's row_map and allocate graph's entries
273  graph.row_map = row_map ;
274  graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
275 
276  //--------------------------------
277  // Fill graph's entries from the (node,node) set.
278 
279  execution_space().fence();
280  results.scan_node_count = wall_clock.seconds();
281 
282  wall_clock.reset();
284  Kokkos::parallel_for( node_node_set.capacity() , *this );
285 
286  execution_space().fence();
287  results.fill_graph_entries = wall_clock.seconds();
288 
289  //--------------------------------
290  // Done with the temporary sets and arrays
291  wall_clock.reset();
293 
295  row_count = RowMapType();
296  row_map = RowMapType();
297  node_node_set.clear();
298 
299  //--------------------------------
300 
301  Kokkos::parallel_for( node_count , *this );
302 
303  execution_space().fence();
304  results.sort_graph_entries = wall_clock.seconds();
305 
306  //--------------------------------
307  // Element-to-graph mapping:
308  wall_clock.reset();
310  elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) );
311  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
312 
313  execution_space().fence();
314  results.fill_element_graph = wall_clock.seconds();
315  }
316 
317  //------------------------------------
318  // parallel_for: create map and count row length
319 
320  KOKKOS_INLINE_FUNCTION
321  void fill_set( const unsigned ielem ) const
322  {
323  // Loop over element's (row_local_node,col_local_node) pairs:
324  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
325 
326  const unsigned row_node = elem_node_id( ielem , row_local_node );
327 
328  for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
329 
330  const unsigned col_node = elem_node_id( ielem , col_local_node );
331 
332  // If either node is locally owned then insert the pair into the unordered map:
333 
334  if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) {
335 
336  const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
337 
338  const typename SetType::insert_result result = node_node_set.insert( key );
339 
340  if ( result.success() ) {
341  if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 ); }
342  if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 ); }
343  }
344  }
345  }
346  }
347  }
348 
349  KOKKOS_INLINE_FUNCTION
350  void fill_graph_entries( const unsigned iset ) const
351  {
352  if ( node_node_set.valid_at(iset) ) {
353  const key_type key = node_node_set.key_at(iset) ;
354  const unsigned row_node = key.first ;
355  const unsigned col_node = key.second ;
356 
357  if ( row_node < row_count.extent(0) ) {
358  const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 );
359  graph.entries( offset ) = col_node ;
360  }
361 
362  if ( col_node < row_count.extent(0) && col_node != row_node ) {
363  const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 );
364  graph.entries( offset ) = row_node ;
365  }
366  }
367  }
368 
369  KOKKOS_INLINE_FUNCTION
370  void sort_graph_entries( const unsigned irow ) const
371  {
372  typedef typename CrsGraphType::size_type size_type;
373  const size_type row_beg = graph.row_map( irow );
374  const size_type row_end = graph.row_map( irow + 1 );
375  for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
376  const typename CrsGraphType::data_type col = graph.entries(i);
377  size_type j = i ;
378  for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
379  graph.entries(j) = graph.entries(j-1);
380  }
381  graph.entries(j) = col ;
382  }
383  }
384 
385  KOKKOS_INLINE_FUNCTION
386  void fill_elem_graph_map( const unsigned ielem ) const
387  {
388  typedef typename CrsGraphType::data_type entry_type;
389  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
390 
391  const unsigned row_node = elem_node_id( ielem , row_local_node );
392 
393  for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
394 
395  const unsigned col_node = elem_node_id( ielem , col_local_node );
396 
397  entry_type entry = 0 ;
398 
399  if ( row_node + 1 < graph.row_map.extent(0) ) {
400 
401  const entry_type entry_end = static_cast<entry_type> (graph.row_map( row_node + 1 ));
402 
403  entry = graph.row_map( row_node );
404 
405  for ( ; entry < entry_end && graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
406 
407  if ( entry == entry_end ) entry = ~0u ;
408  }
409 
410  elem_graph( ielem , row_local_node , col_local_node ) = entry ;
411  }
412  }
413  }
414 
415  KOKKOS_INLINE_FUNCTION
416  void operator()( const unsigned iwork ) const
417  {
418  if ( phase == FILL_NODE_SET ) {
419  fill_set( iwork );
420  }
421  else if ( phase == FILL_GRAPH_ENTRIES ) {
422  fill_graph_entries( iwork );
423  }
424  else if ( phase == SORT_GRAPH_ENTRIES ) {
425  sort_graph_entries( iwork );
426  }
427  else if ( phase == FILL_ELEMENT_GRAPH ) {
428  fill_elem_graph_map( iwork );
429  }
430  }
431 
432  //------------------------------------
433  // parallel_scan: row offsets
434 
435  typedef unsigned value_type ;
436 
437  KOKKOS_INLINE_FUNCTION
438  void operator()( const unsigned irow , unsigned & update , const bool final ) const
439  {
440  // exclusive scan
441  if ( final ) { row_map( irow ) = update ; }
442 
443  update += row_count( irow );
444 
445  if ( final ) {
446  if ( irow + 1 == row_count.extent(0) ) {
447  row_map( irow + 1 ) = update ;
448  row_total() = update ;
449  }
450  }
451  }
452 
453  KOKKOS_INLINE_FUNCTION
454  void init( unsigned & update ) const { update = 0 ; }
455 
456  KOKKOS_INLINE_FUNCTION
457  void join( unsigned & update , const unsigned & input ) const { update += input ; }
458 
459  //------------------------------------
460 };
461 
462 } /* namespace FENL */
463 } /* namespace Example */
464 } /* namespace Kokkos */
465 
466 //----------------------------------------------------------------------------
467 //----------------------------------------------------------------------------
468 
469 namespace Kokkos {
470 namespace Example {
471 namespace FENL {
472 
474  enum { is_constant = false };
475 
476  const double coeff_k ;
477 
478  KOKKOS_INLINE_FUNCTION
479  double operator()( double pt[], unsigned ensemble_rank) const
480  { return coeff_k * std::sin(pt[0]) * std::sin(pt[1]) * std::sin(pt[2]); }
481 
483  : coeff_k( val ) {}
484 
486  : coeff_k( rhs.coeff_k ) {}
487 };
488 
489 // Exponential KL from Stokhos
490 template < typename Scalar, typename MeshScalar, typename Device >
492 public:
493 
494  // Turn into a meta-function class usable with Sacado::mpl::apply
495  template <typename T1, typename T2 = MeshScalar, typename T3 = Device>
496  struct apply {
498  };
499 
500  enum { is_constant = false };
501  typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device> RandomVariableView;
502  typedef typename RandomVariableView::size_type size_type;
503 
508 
509  rf_type m_rf; // Exponential random field
510  const MeshScalar m_mean; // Mean of random field
511  const MeshScalar m_variance; // Variance of random field
512  const MeshScalar m_corr_len; // Correlation length of random field
513  const size_type m_num_rv; // Number of random variables
514  RandomVariableView m_rv; // KL random variables
515 
516 public:
517 
519  const MeshScalar mean ,
520  const MeshScalar variance ,
521  const MeshScalar correlation_length ,
522  const size_type num_rv ) :
523  m_mean( mean ),
524  m_variance( variance ),
525  m_corr_len( correlation_length ),
526  m_num_rv( num_rv ),
527  m_rv( "KL Random Variables", m_num_rv )
528  {
529  Teuchos::ParameterList solverParams;
530  solverParams.set("Number of KL Terms", int(num_rv));
531  solverParams.set("Mean", mean);
532  solverParams.set("Standard Deviation", std::sqrt(variance));
533  int ndim = 3;
534  Teuchos::Array<double> domain_upper(ndim, 1.0), domain_lower(ndim, 0.0),
535  correlation_lengths(ndim, correlation_length);
536  solverParams.set("Domain Upper Bounds", domain_upper);
537  solverParams.set("Domain Lower Bounds", domain_lower);
538  solverParams.set("Correlation Lengths", correlation_lengths);
539 
540  m_rf = rf_type(solverParams);
541  }
542 
544  m_rf( rhs.m_rf ) ,
545  m_mean( rhs.m_mean ) ,
546  m_variance( rhs.m_variance ) ,
547  m_corr_len( rhs.m_corr_len ) ,
548  m_num_rv( rhs.m_num_rv ) ,
549  m_rv( rhs.m_rv ) {}
550 
551  KOKKOS_INLINE_FUNCTION
552  void setRandomVariables( const RandomVariableView& rv) { m_rv = rv; }
553 
554  KOKKOS_INLINE_FUNCTION
556 
557  KOKKOS_INLINE_FUNCTION
558  local_scalar_type operator() ( const MeshScalar point[],
559  const size_type ensemble_rank ) const
560  {
561  local_rv_view_type local_rv =
563 
564  local_scalar_type val = m_rf.evaluate(point, local_rv);
565 
566  return val;
567  }
568 };
569 
570 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
571  class CoordinateMap , typename ScalarType >
573 {
574 public:
575 
578 
579  //------------------------------------
580 
581  typedef ExecutionSpace execution_space ;
582  typedef ScalarType scalar_type ;
583 
586  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
587 
588  //------------------------------------
589 
591  static const unsigned TensorDim = SpatialDim * SpatialDim ;
595 
596  //------------------------------------
597 
600  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
601  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
602 
604 
605  //------------------------------------
606 
607 
608  //------------------------------------
609  // Computational data:
610 
620 
622  : elem_data()
624  , node_coords( rhs.node_coords )
625  , elem_graph( rhs.elem_graph )
628  , solution( rhs.solution )
629  , residual( rhs.residual )
630  , jacobian( rhs.jacobian )
631  {}
632 
633  ElementComputationBase( const mesh_type & arg_mesh ,
634  const vector_type & arg_solution ,
635  const elem_graph_type & arg_elem_graph ,
636  const sparse_matrix_type & arg_jacobian ,
637  const vector_type & arg_residual )
638  : elem_data()
639  , elem_node_ids( arg_mesh.elem_node() )
640  , node_coords( arg_mesh.node_coord() )
641  , elem_graph( arg_elem_graph )
642  , elem_jacobians()
643  , elem_residuals()
644  , solution( arg_solution )
645  , residual( arg_residual )
646  , jacobian( arg_jacobian )
647  {}
648 
649  //------------------------------------
650 
651  KOKKOS_INLINE_FUNCTION
653  const double grad[][ FunctionCount ] , // Gradient of bases master element
654  const double x[] ,
655  const double y[] ,
656  const double z[] ,
657  double dpsidx[] ,
658  double dpsidy[] ,
659  double dpsidz[] ) const
660  {
661  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
662  j21 = 3 , j22 = 4 , j23 = 5 ,
663  j31 = 6 , j32 = 7 , j33 = 8 };
664 
665  // Jacobian accumulation:
666 
667  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
668 
669  for( unsigned i = 0; i < FunctionCount ; ++i ) {
670  const double x1 = x[i] ;
671  const double x2 = y[i] ;
672  const double x3 = z[i] ;
673 
674  const double g1 = grad[0][i] ;
675  const double g2 = grad[1][i] ;
676  const double g3 = grad[2][i] ;
677 
678  J[j11] += g1 * x1 ;
679  J[j12] += g1 * x2 ;
680  J[j13] += g1 * x3 ;
681 
682  J[j21] += g2 * x1 ;
683  J[j22] += g2 * x2 ;
684  J[j23] += g2 * x3 ;
685 
686  J[j31] += g3 * x1 ;
687  J[j32] += g3 * x2 ;
688  J[j33] += g3 * x3 ;
689  }
690 
691  // Inverse jacobian:
692 
693  double invJ[ TensorDim ] = {
694  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
695  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
696  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
697 
698  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
699  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
700  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
701 
702  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
703  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
704  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
705 
706  const double detJ = J[j11] * invJ[j11] +
707  J[j21] * invJ[j12] +
708  J[j31] * invJ[j13] ;
709 
710  const double detJinv = 1.0 / detJ ;
711 
712  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
713 
714  // Transform gradients:
715 
716  for( unsigned i = 0; i < FunctionCount ; ++i ) {
717  const double g0 = grad[0][i];
718  const double g1 = grad[1][i];
719  const double g2 = grad[2][i];
720 
721  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
722  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
723  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
724  }
725 
726  return detJ ;
727  }
728 
729 };
730 
736 };
737 
738 template< class FiniteElementMeshType ,
739  class SparseMatrixType ,
740  AssemblyMethod Method ,
741  class CoeffFunctionType = ElementComputationConstantCoefficient
742  >
744 
745 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
746  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
748  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
749  CrsMatrix< ScalarType , ExecutionSpace > ,
750  Analytic , CoeffFunctionType > :
751  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
752  ScalarType> {
753 public:
754 
755  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
756  ScalarType> base_type;
757 
760 
761  static const unsigned FunctionCount = base_type::FunctionCount;
762  static const unsigned IntegrationCount = base_type::IntegrationCount;
763  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
764 
765  const CoeffFunctionType coeff_function ;
767 
769  base_type(rhs) ,
770  coeff_function( rhs.coeff_function ) ,
771  dev_config( rhs.dev_config ) {}
772 
774  const typename base_type::mesh_type & arg_mesh ,
775  const CoeffFunctionType & arg_coeff_function ,
776  const typename base_type::vector_type & arg_solution ,
777  const typename base_type::elem_graph_type & arg_elem_graph ,
778  const typename base_type::sparse_matrix_type & arg_jacobian ,
779  const typename base_type::vector_type & arg_residual ,
780  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
781  base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
782  arg_residual),
783  coeff_function( arg_coeff_function ) ,
784  dev_config( arg_dev_config ) {}
785 
786  //------------------------------------
787 
788  void apply() const
789  {
790  const size_t nelem = this->elem_node_ids.extent(0);
791  parallel_for( nelem , *this );
792  }
793 
794  KOKKOS_INLINE_FUNCTION
795  void gatherSolution(const unsigned ielem,
796  scalar_type val[],
797  unsigned node_index[],
798  double x[], double y[], double z[],
799  scalar_type res[],
800  scalar_type mat[][FunctionCount]) const
801  {
802  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
803  const unsigned ni = this->elem_node_ids( ielem , i );
804 
805  node_index[i] = ni ;
806 
807  x[i] = this->node_coords( ni , 0 );
808  y[i] = this->node_coords( ni , 1 );
809  z[i] = this->node_coords( ni , 2 );
810 
811  val[i] = this->solution( ni ) ;
812  res[i] = 0 ;
813 
814  for( unsigned j = 0; j < FunctionCount ; j++){
815  mat[i][j] = 0 ;
816  }
817  }
818  }
819 
820  KOKKOS_INLINE_FUNCTION
821  void scatterResidual(const unsigned ielem,
822  const unsigned node_index[],
823  const scalar_type res[],
824  const scalar_type mat[][FunctionCount]) const
825  {
826  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
827  const unsigned row = node_index[i] ;
828  if ( row < this->residual.extent(0) ) {
829  atomic_add( & this->residual( row ) , res[i] );
830 
831  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
832  const unsigned entry = this->elem_graph( ielem , i , j );
833  if ( entry != ~0u ) {
834  atomic_add( & this->jacobian.values( entry ) , mat[i][j] );
835  }
836  }
837  }
838  }
839  }
840 
841  KOKKOS_INLINE_FUNCTION
843  const scalar_type dof_values[] ,
844  const double x[],
845  const double y[],
846  const double z[],
847  scalar_type elem_res[] ,
848  scalar_type elem_mat[][FunctionCount] ) const
849  {
850  scalar_type coeff_k;
851  double coeff_src = 1.234;
852  double advection[] = { 1.1, 1.2, 1.3 };
853  double dpsidx[ FunctionCount ] ;
854  double dpsidy[ FunctionCount ] ;
855  double dpsidz[ FunctionCount ] ;
856  double pt[] = {0.0, 0.0, 0.0};
857  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
858 
859  // If function is not constant
860  // then compute physical coordinates of integration point
861  if ( ! coeff_function.is_constant ) {
862  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
863  pt[0] += x[j] * this->elem_data.values[i][j] ;
864  pt[1] += y[j] * this->elem_data.values[i][j] ;
865  pt[2] += z[j] * this->elem_data.values[i][j] ;
866  }
867  }
868  coeff_k = coeff_function(pt, 0);
869 
870  const double integ_weight = this->elem_data.weights[i];
871  const double* bases_vals = this->elem_data.values[i];
872  const double detJ =
873  this->transform_gradients( this->elem_data.gradients[i] ,
874  x , y , z ,
875  dpsidx , dpsidy , dpsidz );
876  const double detJ_weight = detJ * integ_weight;
877  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
878 
879  scalar_type value_at_pt = 0 ;
880  scalar_type gradx_at_pt = 0 ;
881  scalar_type grady_at_pt = 0 ;
882  scalar_type gradz_at_pt = 0 ;
883  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
884  value_at_pt += dof_values[m] * bases_vals[m] ;
885  gradx_at_pt += dof_values[m] * dpsidx[m] ;
886  grady_at_pt += dof_values[m] * dpsidy[m] ;
887  gradz_at_pt += dof_values[m] * dpsidz[m] ;
888  }
889 
890  const scalar_type source_term =
891  coeff_src * value_at_pt * value_at_pt ;
892  const scalar_type source_deriv =
893  2.0 * coeff_src * value_at_pt ;
894 
895  const scalar_type advection_x = advection[0];
896  const scalar_type advection_y = advection[1];
897  const scalar_type advection_z = advection[2];
898 
899  const scalar_type advection_term =
900  advection_x*gradx_at_pt +
901  advection_y*grady_at_pt +
902  advection_z*gradz_at_pt ;
903 
904  for ( unsigned m = 0; m < FunctionCount; ++m) {
905  scalar_type * const mat = elem_mat[m] ;
906  const double bases_val_m = bases_vals[m] * detJ_weight ;
907  const double dpsidx_m = dpsidx[m] ;
908  const double dpsidy_m = dpsidy[m] ;
909  const double dpsidz_m = dpsidz[m] ;
910 
911  elem_res[m] +=
912  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
913  dpsidy_m * grady_at_pt +
914  dpsidz_m * gradz_at_pt ) +
915  bases_val_m * ( advection_term + source_term ) ;
916 
917  for( unsigned n = 0; n < FunctionCount; n++) {
918  const double dpsidx_n = dpsidx[n] ;
919  const double dpsidy_n = dpsidy[n] ;
920  const double dpsidz_n = dpsidz[n] ;
921  mat[n] +=
922  detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
923  dpsidy_m * dpsidy_n +
924  dpsidz_m * dpsidz_n ) +
925  bases_val_m * ( advection_x * dpsidx_n +
926  advection_y * dpsidy_n +
927  advection_z * dpsidz_n +
928  source_deriv * bases_vals[n] ) ;
929  }
930  }
931  }
932  }
933 
934  KOKKOS_INLINE_FUNCTION
935  void operator()( const unsigned ielem ) const
936  {
937  double x[ FunctionCount ] ;
938  double y[ FunctionCount ] ;
939  double z[ FunctionCount ] ;
940  unsigned node_index[ ElemNodeCount ];
941 
943  scalar_type elem_res[ FunctionCount ] ;
944  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
945 
946  // Gather nodal coordinates and solution vector:
947  gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
948 
949  // Compute nodal element residual vector and Jacobian matrix
950  computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
951 
952  // Scatter nodal element residual and Jacobian in global vector and matrix:
953  scatterResidual( ielem, node_index, elem_res, elem_mat );
954  }
955 }; /* ElementComputation */
956 
957 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
958  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
960  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
961  CrsMatrix< ScalarType , ExecutionSpace > ,
962  FadElement , CoeffFunctionType > :
963  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
964  ScalarType> {
965 public:
966 
967  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
968  ScalarType> base_type;
969 
972 
973  static const unsigned FunctionCount = base_type::FunctionCount;
974  static const unsigned IntegrationCount = base_type::IntegrationCount;
975  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
976 
977  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
978  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
979 
980  const CoeffFunctionType coeff_function ;
982 
984  base_type(rhs) ,
985  coeff_function( rhs.coeff_function ) ,
986  dev_config( rhs.dev_config ) {}
987 
989  const typename base_type::mesh_type & arg_mesh ,
990  const CoeffFunctionType & arg_coeff_function ,
991  const typename base_type::vector_type & arg_solution ,
992  const typename base_type::elem_graph_type & arg_elem_graph ,
993  const typename base_type::sparse_matrix_type & arg_jacobian ,
994  const typename base_type::vector_type & arg_residual ,
995  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
996  base_type(arg_mesh, arg_solution, arg_elem_graph,
997  arg_jacobian, arg_residual),
998  coeff_function( arg_coeff_function ) ,
999  dev_config( arg_dev_config ) {}
1000 
1001  //------------------------------------
1002 
1003  void apply() const
1004  {
1005  const size_t nelem = this->elem_node_ids.extent(0);
1006  parallel_for( nelem , *this );
1007  }
1008 
1009  KOKKOS_INLINE_FUNCTION
1010  void gatherSolution(const unsigned ielem,
1011  fad_scalar_type val[],
1012  unsigned node_index[],
1013  double x[], double y[], double z[],
1014  fad_scalar_type res[]) const
1015  {
1016  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1017  const unsigned ni = this->elem_node_ids( ielem , i );
1018 
1019  node_index[i] = ni ;
1020 
1021  x[i] = this->node_coords( ni , 0 );
1022  y[i] = this->node_coords( ni , 1 );
1023  z[i] = this->node_coords( ni , 2 );
1024 
1025  val[i].val() = this->solution( ni );
1026  val[i].diff( i, FunctionCount );
1027  }
1028  }
1029 
1030  KOKKOS_INLINE_FUNCTION
1031  void scatterResidual(const unsigned ielem,
1032  const unsigned node_index[],
1033  fad_scalar_type res[]) const
1034  {
1035  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1036  const unsigned row = node_index[i] ;
1037  if ( row < this->residual.extent(0) ) {
1038  atomic_add( & this->residual( row ) , res[i].val() );
1039 
1040  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1041  const unsigned entry = this->elem_graph( ielem , i , j );
1042  if ( entry != ~0u ) {
1043  atomic_add( & this->jacobian.values( entry ) ,
1044  res[i].fastAccessDx(j) );
1045  }
1046  }
1047  }
1048  }
1049  }
1050 
1051  template <typename local_scalar_type>
1052  KOKKOS_INLINE_FUNCTION
1053  void computeElementResidual(const local_scalar_type dof_values[] ,
1054  const double x[],
1055  const double y[],
1056  const double z[],
1057  local_scalar_type elem_res[] ) const
1058  {
1059  scalar_type coeff_k;
1060  double coeff_src = 1.234;
1061  double advection[] = { 1.1, 1.2, 1.3 };
1062  double dpsidx[ FunctionCount ] ;
1063  double dpsidy[ FunctionCount ] ;
1064  double dpsidz[ FunctionCount ] ;
1065  double pt[] = {0.0, 0.0, 0.0};
1066  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1067 
1068  // If function is not constant
1069  // then compute physical coordinates of integration point
1070  if ( ! coeff_function.is_constant ) {
1071  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1072  pt[0] += x[j] * this->elem_data.values[i][j] ;
1073  pt[1] += y[j] * this->elem_data.values[i][j] ;
1074  pt[2] += z[j] * this->elem_data.values[i][j] ;
1075  }
1076  }
1077  coeff_k = coeff_function(pt, 0);
1078 
1079  const double integ_weight = this->elem_data.weights[i];
1080  const double* bases_vals = this->elem_data.values[i];
1081  const double detJ =
1082  this->transform_gradients( this->elem_data.gradients[i] ,
1083  x , y , z ,
1084  dpsidx , dpsidy , dpsidz );
1085  const double detJ_weight = detJ * integ_weight;
1086  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1087 
1088  local_scalar_type value_at_pt = 0 ;
1089  local_scalar_type gradx_at_pt = 0 ;
1090  local_scalar_type grady_at_pt = 0 ;
1091  local_scalar_type gradz_at_pt = 0 ;
1092  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1093  value_at_pt += dof_values[m] * bases_vals[m] ;
1094  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1095  grady_at_pt += dof_values[m] * dpsidy[m] ;
1096  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1097  }
1098 
1099  const local_scalar_type source_term =
1100  coeff_src * value_at_pt * value_at_pt ;
1101 
1102  const local_scalar_type advection_term =
1103  advection[0]*gradx_at_pt +
1104  advection[1]*grady_at_pt +
1105  advection[2]*gradz_at_pt;
1106 
1107  for ( unsigned m = 0; m < FunctionCount; ++m) {
1108  const double bases_val_m = bases_vals[m] * detJ_weight ;
1109  const double dpsidx_m = dpsidx[m] ;
1110  const double dpsidy_m = dpsidy[m] ;
1111  const double dpsidz_m = dpsidz[m] ;
1112 
1113  elem_res[m] +=
1114  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1115  dpsidy_m * grady_at_pt +
1116  dpsidz_m * gradz_at_pt ) +
1117  bases_val_m * ( advection_term + source_term ) ;
1118  }
1119  }
1120  }
1121 
1122  KOKKOS_INLINE_FUNCTION
1123  void operator()( const unsigned ielem ) const
1124  {
1125  double x[ FunctionCount ] ;
1126  double y[ FunctionCount ] ;
1127  double z[ FunctionCount ] ;
1128  unsigned node_index[ ElemNodeCount ];
1129 
1131  fad_scalar_type elem_res[ FunctionCount ] ; // this zeros elem_res
1132 
1133  // Gather nodal coordinates and solution vector:
1134  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1135 
1136  // Compute nodal element residual vector:
1137  computeElementResidual( val, x, y, z, elem_res );
1138 
1139  // Scatter nodal element residual in global vector:
1140  scatterResidual( ielem, node_index, elem_res );
1141  }
1142 }; /* ElementComputation */
1143 
1144 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1145  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1147  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1148  CrsMatrix< ScalarType , ExecutionSpace > ,
1149  FadElementOptimized , CoeffFunctionType > :
1150  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1151  CrsMatrix< ScalarType , ExecutionSpace > ,
1152  FadElement, CoeffFunctionType > {
1153 public:
1154 
1157  FadElement , CoeffFunctionType > base_type;
1158 
1161 
1162  static const unsigned FunctionCount = base_type::FunctionCount;
1163  static const unsigned IntegrationCount = base_type::IntegrationCount;
1164  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1165 
1166  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1167  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1168 
1170 
1172  const typename base_type::mesh_type & arg_mesh ,
1173  const CoeffFunctionType & arg_coeff_function ,
1174  const typename base_type::vector_type & arg_solution ,
1175  const typename base_type::elem_graph_type & arg_elem_graph ,
1176  const typename base_type::sparse_matrix_type & arg_jacobian ,
1177  const typename base_type::vector_type & arg_residual ,
1178  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1179  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1180  arg_jacobian, arg_residual, arg_dev_config) {}
1181 
1182  //------------------------------------
1183 
1184  void apply() const
1185  {
1186  const size_t nelem = this->elem_node_ids.extent(0);
1187  parallel_for( nelem , *this );
1188  }
1189 
1190  KOKKOS_INLINE_FUNCTION
1191  void gatherSolution(const unsigned ielem,
1192  scalar_type val[],
1193  unsigned node_index[],
1194  double x[], double y[], double z[],
1195  fad_scalar_type res[]) const
1196  {
1197  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1198  const unsigned ni = this->elem_node_ids( ielem , i );
1199 
1200  node_index[i] = ni ;
1201 
1202  x[i] = this->node_coords( ni , 0 );
1203  y[i] = this->node_coords( ni , 1 );
1204  z[i] = this->node_coords( ni , 2 );
1205 
1206  val[i] = this->solution( ni );
1207  }
1208  }
1209 
1210  template <typename local_scalar_type>
1211  KOKKOS_INLINE_FUNCTION
1212  void computeElementResidual(const scalar_type dof_values[] ,
1213  const double x[],
1214  const double y[],
1215  const double z[],
1216  local_scalar_type elem_res[] ) const
1217  {
1218  scalar_type coeff_k;
1219  double coeff_src = 1.234;
1220  double advection[] = { 1.1, 1.2, 1.3 };
1221  double dpsidx[ FunctionCount ] ;
1222  double dpsidy[ FunctionCount ] ;
1223  double dpsidz[ FunctionCount ] ;
1224  double pt[] = {0.0, 0.0, 0.0};
1225  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1226 
1227  // If function is not constant
1228  // then compute physical coordinates of integration point
1229  if ( ! this->coeff_function.is_constant ) {
1230  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1231  pt[0] += x[j] * this->elem_data.values[i][j] ;
1232  pt[1] += y[j] * this->elem_data.values[i][j] ;
1233  pt[2] += z[j] * this->elem_data.values[i][j] ;
1234  }
1235  }
1236  coeff_k = this->coeff_function(pt, 0);
1237 
1238  const double integ_weight = this->elem_data.weights[i];
1239  const double* bases_vals = this->elem_data.values[i];
1240  const double detJ =
1241  this->transform_gradients( this->elem_data.gradients[i] ,
1242  x , y , z ,
1243  dpsidx , dpsidy , dpsidz );
1244  const double detJ_weight = detJ * integ_weight;
1245  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1246 
1247  local_scalar_type value_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1248  local_scalar_type gradx_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1249  local_scalar_type grady_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1250  local_scalar_type gradz_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1251  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1252  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1253  value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1254 
1255  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1256  gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1257 
1258  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1259  grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1260 
1261  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1262  gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1263  }
1264 
1265  const local_scalar_type source_term =
1266  coeff_src * value_at_pt * value_at_pt ;
1267 
1268  const local_scalar_type advection_term =
1269  advection[0]*gradx_at_pt +
1270  advection[1]*grady_at_pt +
1271  advection[2]*gradz_at_pt;
1272 
1273  for ( unsigned m = 0; m < FunctionCount; ++m) {
1274  const double bases_val_m = bases_vals[m] * detJ_weight ;
1275  const double dpsidx_m = dpsidx[m] ;
1276  const double dpsidy_m = dpsidy[m] ;
1277  const double dpsidz_m = dpsidz[m] ;
1278 
1279  elem_res[m] +=
1280  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1281  dpsidy_m * grady_at_pt +
1282  dpsidz_m * gradz_at_pt ) +
1283  bases_val_m * ( advection_term + source_term ) ;
1284  }
1285  }
1286  }
1287 
1288  KOKKOS_INLINE_FUNCTION
1289  void operator()( const unsigned ielem ) const
1290  {
1291  double x[ FunctionCount ] ;
1292  double y[ FunctionCount ] ;
1293  double z[ FunctionCount ] ;
1294  unsigned node_index[ ElemNodeCount ];
1295 
1297  fad_scalar_type elem_res[ FunctionCount ] ;
1298 
1299  // Gather nodal coordinates and solution vector:
1300  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1301 
1302  // Compute nodal element residual vector:
1303  computeElementResidual( val, x, y, z, elem_res );
1304 
1305  // Scatter nodal element residual in global vector:
1306  this->scatterResidual( ielem, node_index, elem_res );
1307  }
1308 }; /* ElementComputation */
1309 
1310 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1311  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1313  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1314  CrsMatrix< ScalarType , ExecutionSpace > ,
1315  FadQuadPoint , CoeffFunctionType > :
1316  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1317  CrsMatrix< ScalarType , ExecutionSpace > ,
1318  Analytic , CoeffFunctionType > {
1319 public:
1320 
1323  Analytic , CoeffFunctionType > base_type;
1324 
1327 
1328  static const unsigned FunctionCount = base_type::FunctionCount;
1329  static const unsigned IntegrationCount = base_type::IntegrationCount;
1330  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1331 
1332  typedef Sacado::Fad::SLFad<scalar_type,4> fad_scalar_type;
1333  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1334 
1336 
1338  const typename base_type::mesh_type & arg_mesh ,
1339  const CoeffFunctionType & arg_coeff_function ,
1340  const typename base_type::vector_type & arg_solution ,
1341  const typename base_type::elem_graph_type & arg_elem_graph ,
1342  const typename base_type::sparse_matrix_type & arg_jacobian ,
1343  const typename base_type::vector_type & arg_residual ,
1344  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1345  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1346  arg_jacobian, arg_residual, arg_dev_config) {}
1347 
1348  //------------------------------------
1349 
1350  void apply() const
1351  {
1352  const size_t nelem = this->elem_node_ids.extent(0);
1353  parallel_for( nelem , *this );
1354  }
1355 
1356  KOKKOS_INLINE_FUNCTION
1358  const scalar_type dof_values[] ,
1359  const double x[],
1360  const double y[],
1361  const double z[],
1362  scalar_type elem_res[] ,
1363  scalar_type elem_mat[][FunctionCount] ) const
1364  {
1365  scalar_type coeff_k;
1366  double coeff_src = 1.234;
1367  double advection[] = { 1.1, 1.2, 1.3 };
1368  double dpsidx[ FunctionCount ] ;
1369  double dpsidy[ FunctionCount ] ;
1370  double dpsidz[ FunctionCount ] ;
1371  double pt[] = {0.0, 0.0, 0.0};
1372 
1373  fad_scalar_type value_at_pt(4, 0, 0.0) ;
1374  fad_scalar_type gradx_at_pt(4, 1, 0.0) ;
1375  fad_scalar_type grady_at_pt(4, 2, 0.0) ;
1376  fad_scalar_type gradz_at_pt(4, 3, 0.0) ;
1377  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1378 
1379  // If function is not constant
1380  // then compute physical coordinates of integration point
1381  if ( ! this->coeff_function.is_constant ) {
1382  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1383  pt[0] += x[j] * this->elem_data.values[i][j] ;
1384  pt[1] += y[j] * this->elem_data.values[i][j] ;
1385  pt[2] += z[j] * this->elem_data.values[i][j] ;
1386  }
1387  }
1388  coeff_k = this->coeff_function(pt, 0);
1389 
1390  const double integ_weight = this->elem_data.weights[i];
1391  const double* bases_vals = this->elem_data.values[i];
1392  const double detJ =
1393  this->transform_gradients( this->elem_data.gradients[i] ,
1394  x , y , z ,
1395  dpsidx , dpsidy , dpsidz );
1396  const double detJ_weight = detJ * integ_weight;
1397  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1398 
1399  value_at_pt.val() = 0.0 ;
1400  gradx_at_pt.val() = 0.0 ;
1401  grady_at_pt.val() = 0.0 ;
1402  gradz_at_pt.val() = 0.0 ;
1403  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1404  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1405  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1406  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1407  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1408  }
1409 
1410  const fad_scalar_type source_term =
1411  coeff_src * value_at_pt * value_at_pt ;
1412 
1413  const fad_scalar_type advection_term =
1414  advection[0]*gradx_at_pt +
1415  advection[1]*grady_at_pt +
1416  advection[2]*gradz_at_pt;
1417 
1418  for ( unsigned m = 0; m < FunctionCount; ++m) {
1419  const double bases_val_m = bases_vals[m] * detJ_weight ;
1420  fad_scalar_type res =
1421  detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1422  dpsidy[m] * grady_at_pt +
1423  dpsidz[m] * gradz_at_pt ) +
1424  bases_val_m * ( advection_term + source_term ) ;
1425 
1426  elem_res[m] += res.val();
1427 
1428  scalar_type * const mat = elem_mat[m] ;
1429  for( unsigned n = 0; n < FunctionCount; n++) {
1430  mat[n] += res.fastAccessDx(0) * bases_vals[n] +
1431  res.fastAccessDx(1) * dpsidx[n] +
1432  res.fastAccessDx(2) * dpsidy[n] +
1433  res.fastAccessDx(3) * dpsidz[n];
1434  }
1435  }
1436  }
1437  }
1438 
1439  KOKKOS_INLINE_FUNCTION
1440  void operator()( const unsigned ielem ) const
1441  {
1442  double x[ FunctionCount ] ;
1443  double y[ FunctionCount ] ;
1444  double z[ FunctionCount ] ;
1445  unsigned node_index[ ElemNodeCount ];
1446 
1448  scalar_type elem_res[ FunctionCount ] ;
1449  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1450 
1451  // Gather nodal coordinates and solution vector:
1452  this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1453 
1454  // Compute nodal element residual vector and Jacobian matrix:
1455  computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1456 
1457  // Scatter nodal element residual and Jacobian in global vector and matrix:
1458  this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1459  }
1460 }; /* ElementComputation */
1461 
1462 #if 0
1463 template< class FiniteElementMeshType , class SparseMatrixType
1464  , class CoeffFunctionType = ElementComputationConstantCoefficient
1465  >
1466 class ElementComputation ;
1467 
1468 
1469 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1470  typename ScalarType , class CoeffFunctionType >
1471 class ElementComputation
1472  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap >
1473  , Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace >
1474  , CoeffFunctionType >
1475 {
1476 public:
1477 
1480 
1481  //------------------------------------
1482 
1483  typedef ExecutionSpace execution_space ;
1484  typedef ScalarType scalar_type ;
1485 
1488  typedef typename sparse_matrix_type::values_type matrix_values_type ;
1489  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
1490 
1491  //------------------------------------
1492 
1493  typedef LocalViewTraits< vector_type > local_vector_view_traits;
1494  typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1495  typedef typename local_vector_view_traits::local_view_type local_vector_type;
1496  typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1497  typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1498  static const bool use_team = local_vector_view_traits::use_team;
1499 
1500  static const unsigned SpatialDim = element_data_type::spatial_dimension ;
1501  static const unsigned TensorDim = SpatialDim * SpatialDim ;
1502  static const unsigned ElemNodeCount = element_data_type::element_node_count ;
1503  static const unsigned FunctionCount = element_data_type::function_count ;
1504  static const unsigned IntegrationCount = element_data_type::integration_count ;
1505 
1506  //------------------------------------
1507 
1508  typedef typename mesh_type::node_coord_type node_coord_type ;
1509  typedef typename mesh_type::elem_node_type elem_node_type ;
1510  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
1511  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
1512 
1513  typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1514  typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1515  typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1516  typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1517 
1518  typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ;
1519 
1520  //------------------------------------
1521 
1522 
1523  //------------------------------------
1524  // Computational data:
1525 
1529  const elem_graph_type elem_graph ;
1532  const vector_type solution ;
1533  const vector_type residual ;
1535  const CoeffFunctionType coeff_function ;
1536  const Kokkos::Example::FENL::DeviceConfig dev_config ;
1537 
1538  ElementComputation( const ElementComputation & rhs )
1539  : elem_data()
1540  , elem_node_ids( rhs.elem_node_ids )
1541  , node_coords( rhs.node_coords )
1542  , elem_graph( rhs.elem_graph )
1545  , solution( rhs.solution )
1546  , residual( rhs.residual )
1547  , jacobian( rhs.jacobian )
1548  , coeff_function( rhs.coeff_function )
1549  , dev_config( rhs.dev_config )
1550  {}
1551 
1552  // If the element->sparse_matrix graph is provided then perform atomic updates
1553  // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
1554  ElementComputation( const mesh_type & arg_mesh ,
1555  const CoeffFunctionType & arg_coeff_function ,
1556  const vector_type & arg_solution ,
1557  const elem_graph_type & arg_elem_graph ,
1558  const sparse_matrix_type & arg_jacobian ,
1559  const vector_type & arg_residual ,
1560  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1561  : elem_data()
1562  , elem_node_ids( arg_mesh.elem_node() )
1563  , node_coords( arg_mesh.node_coord() )
1564  , elem_graph( arg_elem_graph )
1565  , elem_jacobians()
1566  , elem_residuals()
1567  , solution( arg_solution )
1568  , residual( arg_residual )
1569  , jacobian( arg_jacobian )
1570  , coeff_function( arg_coeff_function )
1571  , dev_config( arg_dev_config )
1572  {}
1573 
1574  //------------------------------------
1575 
1576  void apply() const
1577  {
1578  const size_t nelem = elem_node_ids.extent(0);
1579  if ( use_team ) {
1580  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1581  const size_t league_size =
1582  (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1583  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1584  parallel_for( config , *this );
1585  }
1586  else {
1587  parallel_for( nelem , *this );
1588  }
1589  }
1590 
1591  //------------------------------------
1592 
1593  static const unsigned FLOPS_transform_gradients =
1594  /* Jacobian */ FunctionCount * TensorDim * 2 +
1595  /* Inverse jacobian */ TensorDim * 6 + 6 +
1596  /* Gradient transform */ FunctionCount * 15 ;
1597 
1598  KOKKOS_INLINE_FUNCTION
1599  double transform_gradients(
1600  const double grad[][ FunctionCount ] , // Gradient of bases master element
1601  const double x[] ,
1602  const double y[] ,
1603  const double z[] ,
1604  double dpsidx[] ,
1605  double dpsidy[] ,
1606  double dpsidz[] ) const
1607  {
1608  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1609  j21 = 3 , j22 = 4 , j23 = 5 ,
1610  j31 = 6 , j32 = 7 , j33 = 8 };
1611 
1612  // Jacobian accumulation:
1613 
1614  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1615 
1616  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1617  const double x1 = x[i] ;
1618  const double x2 = y[i] ;
1619  const double x3 = z[i] ;
1620 
1621  const double g1 = grad[0][i] ;
1622  const double g2 = grad[1][i] ;
1623  const double g3 = grad[2][i] ;
1624 
1625  J[j11] += g1 * x1 ;
1626  J[j12] += g1 * x2 ;
1627  J[j13] += g1 * x3 ;
1628 
1629  J[j21] += g2 * x1 ;
1630  J[j22] += g2 * x2 ;
1631  J[j23] += g2 * x3 ;
1632 
1633  J[j31] += g3 * x1 ;
1634  J[j32] += g3 * x2 ;
1635  J[j33] += g3 * x3 ;
1636  }
1637 
1638  // Inverse jacobian:
1639 
1640  double invJ[ TensorDim ] = {
1641  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1642  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1643  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1644 
1645  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1646  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1647  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1648 
1649  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1650  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1651  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1652 
1653  const double detJ = J[j11] * invJ[j11] +
1654  J[j21] * invJ[j12] +
1655  J[j31] * invJ[j13] ;
1656 
1657  const double detJinv = 1.0 / detJ ;
1658 
1659  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1660 
1661  // Transform gradients:
1662 
1663  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1664  const double g0 = grad[0][i];
1665  const double g1 = grad[1][i];
1666  const double g2 = grad[2][i];
1667 
1668  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1669  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1670  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1671  }
1672 
1673  return detJ ;
1674  }
1675 
1676  KOKKOS_INLINE_FUNCTION
1677  void contributeResidualJacobian(
1678  const local_scalar_type dof_values[] ,
1679  const double dpsidx[] ,
1680  const double dpsidy[] ,
1681  const double dpsidz[] ,
1682  const double detJ ,
1683  const local_scalar_type coeff_k ,
1684  const double integ_weight ,
1685  const double bases_vals[] ,
1686  local_scalar_type elem_res[] ,
1687  local_scalar_type elem_mat[][ FunctionCount ] ) const
1688  {
1689  local_scalar_type value_at_pt = 0 ;
1690  local_scalar_type gradx_at_pt = 0 ;
1691  local_scalar_type grady_at_pt = 0 ;
1692  local_scalar_type gradz_at_pt = 0 ;
1693 
1694  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1695  value_at_pt += dof_values[m] * bases_vals[m] ;
1696  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1697  grady_at_pt += dof_values[m] * dpsidy[m] ;
1698  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1699  }
1700 
1701  const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1702  const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1703  const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1704 
1705  // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
1706  // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
1707 
1708  for ( unsigned m = 0; m < FunctionCount; ++m) {
1709  local_scalar_type * const mat = elem_mat[m] ;
1710  const double bases_val_m = bases_vals[m];
1711  const double dpsidx_m = dpsidx[m] ;
1712  const double dpsidy_m = dpsidy[m] ;
1713  const double dpsidz_m = dpsidz[m] ;
1714 
1715  elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1716  dpsidy_m * grady_at_pt +
1717  dpsidz_m * gradz_at_pt ) +
1718  res_val * bases_val_m ;
1719 
1720  for( unsigned n = 0; n < FunctionCount; n++) {
1721 
1722  mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
1723  dpsidy_m * dpsidy[n] +
1724  dpsidz_m * dpsidz[n] ) +
1725  mat_val * bases_val_m * bases_vals[n];
1726  }
1727  }
1728  }
1729 
1730  KOKKOS_INLINE_FUNCTION
1731  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1732  {
1733 
1734  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1735  const unsigned num_element_threads = dev_config.block_dim.y ;
1736  const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1737  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1738 
1739  const unsigned ielem =
1740  dev.league_rank() * num_element_threads + element_rank;
1741 
1742  if (ielem >= elem_node_ids.extent(0))
1743  return;
1744 
1745  (*this)( ielem, ensemble_rank );
1746 
1747  }
1748 
1749  KOKKOS_INLINE_FUNCTION
1750  void operator()( const unsigned ielem ,
1751  const unsigned ensemble_rank = 0 ) const
1752  {
1753  local_vector_type local_solution =
1754  local_vector_view_traits::create_local_view(solution,
1755  ensemble_rank);
1756  local_vector_type local_residual =
1757  local_vector_view_traits::create_local_view(residual,
1758  ensemble_rank);
1759  local_matrix_type local_jacobian_values =
1760  local_matrix_view_traits::create_local_view(jacobian.values,
1761  ensemble_rank);
1762 
1763  // Gather nodal coordinates and solution vector:
1764 
1765  double x[ FunctionCount ] ;
1766  double y[ FunctionCount ] ;
1767  double z[ FunctionCount ] ;
1768  local_scalar_type val[ FunctionCount ] ;
1769  unsigned node_index[ ElemNodeCount ];
1770 
1771  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1772  const unsigned ni = elem_node_ids( ielem , i );
1773 
1774  node_index[i] = ni ;
1775 
1776  x[i] = node_coords( ni , 0 );
1777  y[i] = node_coords( ni , 1 );
1778  z[i] = node_coords( ni , 2 );
1779 
1780  val[i] = local_solution( ni );
1781  }
1782 
1783 
1784  local_scalar_type elem_vec[ FunctionCount ] ;
1785  local_scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1786 
1787  for( unsigned i = 0; i < FunctionCount ; i++ ) {
1788  elem_vec[i] = 0 ;
1789  for( unsigned j = 0; j < FunctionCount ; j++){
1790  elem_mat[i][j] = 0 ;
1791  }
1792  }
1793 
1794 
1795  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1796  double dpsidx[ FunctionCount ] ;
1797  double dpsidy[ FunctionCount ] ;
1798  double dpsidz[ FunctionCount ] ;
1799 
1800  local_scalar_type coeff_k = 0 ;
1801 
1802  {
1803  double pt[] = {0.0, 0.0, 0.0};
1804 
1805  // If function is not constant
1806  // then compute physical coordinates of integration point
1807  if ( ! coeff_function.is_constant ) {
1808  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1809  pt[0] += x[j] * elem_data.values[i][j] ;
1810  pt[1] += y[j] * elem_data.values[i][j] ;
1811  pt[2] += z[j] * elem_data.values[i][j] ;
1812  }
1813  }
1814 
1815  // Need to fix this for local_scalar_type!!!!!!
1816  coeff_k = coeff_function(pt, ensemble_rank);
1817  }
1818 
1819  const double detJ =
1820  transform_gradients( elem_data.gradients[i] , x , y , z ,
1821  dpsidx , dpsidy , dpsidz );
1822 
1823  contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1824  detJ , coeff_k ,
1825  elem_data.weights[i] ,
1826  elem_data.values[i] ,
1827  elem_vec , elem_mat );
1828  }
1829 
1830  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1831  const unsigned row = node_index[i] ;
1832  if ( row < residual.extent(0) ) {
1833  atomic_add( & local_residual( row ) , elem_vec[i] );
1834 
1835  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1836  const unsigned entry = elem_graph( ielem , i , j );
1837  if ( entry != ~0u ) {
1838  atomic_add( & local_jacobian_values( entry ) , elem_mat[i][j] );
1839  }
1840  }
1841  }
1842  }
1843  }
1844 }; /* ElementComputation */
1845 #endif
1846 
1847 //----------------------------------------------------------------------------
1848 
1849 template< class FixtureType , class SparseMatrixType >
1851 
1852 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1853  typename ScalarType >
1855  Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1856  Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace > >
1857 {
1858 public:
1859 
1863 
1864  typedef ExecutionSpace execution_space ;
1865  typedef ScalarType scalar_type ;
1866 
1870  typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
1871 
1872  //------------------------------------
1873 
1878  static const bool use_team = local_vector_view_traits::use_team;
1879 
1880  typedef double bc_scalar_type ;
1881 
1882  //------------------------------------
1883  // Computational data:
1884 
1893  const unsigned bc_plane ;
1894  const unsigned node_count ;
1895  bool init ;
1897 
1898 
1899  DirichletComputation( const mesh_type & arg_mesh ,
1900  const vector_type & arg_solution ,
1901  const sparse_matrix_type & arg_jacobian ,
1902  const vector_type & arg_residual ,
1903  const unsigned arg_bc_plane ,
1904  const bc_scalar_type arg_bc_lower_value ,
1905  const bc_scalar_type arg_bc_upper_value ,
1906  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1907  : node_coords( arg_mesh.node_coord() )
1908  , solution( arg_solution )
1909  , jacobian( arg_jacobian )
1910  , residual( arg_residual )
1911  , bc_lower_value( arg_bc_lower_value )
1912  , bc_upper_value( arg_bc_upper_value )
1913  , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
1914  , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
1915  , bc_plane( arg_bc_plane )
1916  , node_count( arg_mesh.node_count_owned() )
1917  , init( false )
1918  , dev_config( arg_dev_config )
1919  {
1920  parallel_for( node_count , *this );
1921  init = true ;
1922  }
1923 
1924  void apply() const
1925  {
1926  if ( use_team ) {
1927  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1928  const size_t league_size =
1929  (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1930  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1931  parallel_for( config , *this );
1932  }
1933  else
1934  parallel_for( node_count , *this );
1935  }
1936 
1937  //------------------------------------
1938 
1939  KOKKOS_INLINE_FUNCTION
1940  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1941  {
1942 
1943  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1944  const unsigned num_node_threads = dev_config.block_dim.y ;
1945  const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1946  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1947 
1948  const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1949 
1950  if (inode >= node_count)
1951  return;
1952 
1953  (*this)( inode, ensemble_rank );
1954 
1955  }
1956 
1957  KOKKOS_INLINE_FUNCTION
1958  void operator()( const unsigned inode ,
1959  const unsigned ensemble_rank = 0) const
1960  {
1961  local_vector_type local_residual =
1962  local_vector_view_traits::create_local_view(residual,
1963  ensemble_rank);
1964  local_matrix_type local_jacobian_values =
1965  local_matrix_view_traits::create_local_view(jacobian.values,
1966  ensemble_rank);
1967 
1968  // Apply dirichlet boundary condition on the Solution and Residual vectors.
1969  // To maintain the symmetry of the original global stiffness matrix,
1970  // zero out the columns that correspond to boundary conditions, and
1971  // update the residual vector accordingly
1972 
1973  const unsigned iBeg = jacobian.graph.row_map[inode];
1974  const unsigned iEnd = jacobian.graph.row_map[inode+1];
1975 
1976  const scalar_coord_type c = node_coords(inode,bc_plane);
1977  const bool bc_lower = c <= bc_lower_limit ;
1978  const bool bc_upper = bc_upper_limit <= c ;
1979 
1980  if ( ! init ) {
1981  solution(inode) = bc_lower ? bc_lower_value : (
1982  bc_upper ? bc_upper_value : 0 );
1983  }
1984  else {
1985  if ( bc_lower || bc_upper ) {
1986 
1987  local_residual(inode) = 0 ;
1988 
1989  // zero each value on the row, and leave a one
1990  // on the diagonal
1991 
1992  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
1993  local_jacobian_values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
1994  }
1995  }
1996  else {
1997 
1998  // Find any columns that are boundary conditions.
1999  // Clear them and adjust the residual vector
2000 
2001  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2002  const unsigned cnode = jacobian.graph.entries(i) ;
2003  const scalar_coord_type cc = node_coords(cnode,bc_plane);
2004 
2005  if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2006  local_jacobian_values(i) = 0 ;
2007  }
2008  }
2009  }
2010  }
2011  }
2012 };
2013 
2014 } /* namespace FENL */
2015 } /* namespace Example */
2016 } /* namespace Kokkos */
2017 
2018 //----------------------------------------------------------------------------
2019 
2020 /* A Cuda-specific specialization for the element computation functor. */
2021 #if defined( __CUDACC__ )
2022 // #include <NonlinearElement_Cuda.hpp>
2023 #endif
2024 
2025 //----------------------------------------------------------------------------
2026 
2027 #endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
LocalViewTraits< RandomVariableView > local_rv_view_traits
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typename rv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
KOKKOS_INLINE_FUNCTION void join(unsigned &update, const unsigned &input) const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::DefaultExecutionSpace execution_space
double gradients[integration_count][spatial_dimension][function_count]
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
local_rv_view_traits::local_view_type local_rv_view_type
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
ElemNodeIdView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
KokkosSparse::StaticCrsGraph< unsigned, Space, void, void, unsigned > StaticCrsGraphType
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
CrsGraphType::row_map_type::non_const_type RowMapType
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
CrsMatrix(const StaticCrsGraphType &arg_graph)
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
double values[integration_count][function_count]
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
expr val()
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
Kokkos::View< unsigned, execution_space > UnsignedValue
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
local_rv_view_traits::local_value_type local_scalar_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
Kokkos::UnorderedMap< key_type, void, execution_space > SetType
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const