Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadMPAssembly/fenl_functors.hpp
Go to the documentation of this file.
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
18 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
19 
20 #include <stdio.h>
21 
22 #include <iostream>
23 #include <fstream>
24 #include <iomanip>
25 #include <cstdlib>
26 #include <cmath>
27 #include <limits>
28 
29 #include <Kokkos_Pair.hpp>
30 #include <Kokkos_UnorderedMap.hpp>
31 
32 #include <Kokkos_Timer.hpp>
33 
34 #include <BoxElemFixture.hpp>
35 #include <HexElement.hpp>
36 
38 #include "Sacado.hpp"
39 //#include "Fad/Sacado_Fad_SFad_MP_Vector.hpp"
41 //#include "Fad/Sacado_Fad_DFad_MP_Vector.hpp"
42 
43 //----------------------------------------------------------------------------
44 //----------------------------------------------------------------------------
45 
46 namespace Kokkos {
47 namespace Example {
48 namespace FENL {
49 
50 struct DeviceConfig {
51  struct Dim3 {
52  size_t x, y, z;
53  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
54  x(x_), y(y_), z(z_) {}
55  };
56 
58  size_t num_blocks;
60 
61  DeviceConfig(const size_t num_blocks_ = 0,
62  const size_t threads_per_block_x_ = 0,
63  const size_t threads_per_block_y_ = 0,
64  const size_t threads_per_block_z_ = 1) :
65  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
66  num_blocks(num_blocks_),
68  {}
69 };
70 
71 template< typename ValueType , class Space >
72 struct CrsMatrix {
73 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
74  typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
75 #else
76  typedef Kokkos::StaticCrsGraph< unsigned , Space , void , void , unsigned > StaticCrsGraphType ;
77 #endif
78  typedef View< ValueType * , Space > values_type ;
79 
82 
83  CrsMatrix() : graph(), values() {}
84 
85  CrsMatrix( const StaticCrsGraphType & arg_graph )
86  : graph( arg_graph )
87  , values( "crs_matrix_values" , arg_graph.entries.extent(0) )
88  {}
89 };
90 
91 // Traits class for creating strided local views for embedded ensemble-based,
92 // specialized for ensemble UQ scalar type
93 template <typename ViewType, typename Enabled = void>
95  typedef ViewType view_type;
96  // typedef Kokkos::View<typename view_type::data_type,
97  // typename view_type::array_layout,
98  // typename view_type::execution_space,
99  // Kokkos::MemoryUnmanaged> local_view_type;
100  typedef const view_type& local_view_type;
102  static const bool use_team = false;
103  KOKKOS_INLINE_FUNCTION
105  const unsigned local_rank)
106  { return v; }
107 };
108 
109 #if defined( KOKKOS_ENABLE_CUDA )
110 
111 template <typename ViewType>
112 struct LocalViewTraits<
113  ViewType,
114  typename std::enable_if< std::is_same<typename ViewType::execution_space,
115  Kokkos::Cuda>::value &&
116  Kokkos::is_view_mp_vector<ViewType>::value
117  >::type > {
118  typedef ViewType view_type;
121  static const bool use_team = true;
122 
123  KOKKOS_INLINE_FUNCTION
125  const unsigned local_rank)
126  {
127  return Kokkos::partition<1>(v, local_rank);
128  }
129 };
130 
131 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
132 
133 // Compute DeviceConfig struct's based on scalar type
134 template <typename ScalarType>
136  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
137  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
138  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
139  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
140  }
141 };
142 
143 // Compute DeviceConfig struct's based on scalar type
144 template <typename StorageType>
145 struct CreateDeviceConfigs< Sacado::MP::Vector<StorageType> > {
147  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
148  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
149  static const unsigned VectorSize = StorageType::static_size;
150 #if defined( KOKKOS_ENABLE_CUDA )
151  enum { is_cuda = std::is_same< execution_space, Kokkos::Cuda >::value };
152 #else
153  enum { is_cuda = false };
154 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
155  if ( is_cuda ) {
156  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 64/VectorSize );
157  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 256/VectorSize );
158  }
159  else {
160  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
161  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
162  }
163  }
164 };
165 
166 template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
168 public:
169 
171  typedef pair<unsigned,unsigned> key_type ;
172 
173  typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
174  typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
175  typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
176 
177  // Static dimensions of 0 generate compiler warnings or errors.
178  typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
180 
181 private:
182 
188 
189  const unsigned node_count ;
190  const ElemNodeIdView elem_node_id ;
196 
197 public:
198 
199  CrsGraphType graph ;
201 
202  struct Times
203  {
204  double ratio;
210  };
211 
212  NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
213  const unsigned arg_node_count,
214  Times & results
215  )
216  : node_count(arg_node_count)
217  , elem_node_id( arg_elem_node_id )
218  , row_total( "row_total" )
219  , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
220  , row_map( "graph_row_map" , node_count + 1 )
221  , node_node_set()
222  , phase( FILL_NODE_SET )
223  , graph()
224  , elem_graph()
225  {
226  //--------------------------------
227  // Guess at capacity required for the map:
228 
229  Kokkos::Timer wall_clock ;
230 
231  wall_clock.reset();
232  phase = FILL_NODE_SET ;
233 
234  // upper bound on the capacity
235  size_t set_capacity = (((28ull * node_count) / 2ull)*4ull)/3ull;
236 
237 
238  // Increase capacity until the (node,node) map is successfully filled.
239  {
240  // Zero the row count to restart the fill
242 
243  node_node_set = SetType( set_capacity );
244 
245  // May be larger that requested:
246  set_capacity = node_node_set.capacity();
247 
248  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
249  }
250 
251  execution_space().fence();
252  results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
253  results.fill_node_set = wall_clock.seconds();
254  //--------------------------------
255 
256  wall_clock.reset();
258 
259  // Exclusive scan of row_count into row_map
260  // including the final total in the 'node_count + 1' position.
261  // Zero the 'row_count' values.
262  Kokkos::parallel_scan( node_count , *this );
263 
264  // Zero the row count for the fill:
266 
267  unsigned graph_entry_count = 0 ;
268 
269  Kokkos::deep_copy( graph_entry_count , row_total );
270 
271  // Assign graph's row_map and allocate graph's entries
272  graph.row_map = row_map ;
273  graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
274 
275  //--------------------------------
276  // Fill graph's entries from the (node,node) set.
277 
278  execution_space().fence();
279  results.scan_node_count = wall_clock.seconds();
280 
281  wall_clock.reset();
283  Kokkos::parallel_for( node_node_set.capacity() , *this );
284 
285  execution_space().fence();
286  results.fill_graph_entries = wall_clock.seconds();
287 
288  //--------------------------------
289  // Done with the temporary sets and arrays
290  wall_clock.reset();
292 
294  row_count = RowMapType();
295  row_map = RowMapType();
296  node_node_set.clear();
297 
298  //--------------------------------
299 
300  Kokkos::parallel_for( node_count , *this );
301 
302  execution_space().fence();
303  results.sort_graph_entries = wall_clock.seconds();
304 
305  //--------------------------------
306  // Element-to-graph mapping:
307  wall_clock.reset();
309  elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) );
310  Kokkos::parallel_for( elem_node_id.extent(0) , *this );
311 
312  execution_space().fence();
313  results.fill_element_graph = wall_clock.seconds();
314  }
315 
316  //------------------------------------
317  // parallel_for: create map and count row length
318 
319  KOKKOS_INLINE_FUNCTION
320  void fill_set( const unsigned ielem ) const
321  {
322  // Loop over element's (row_local_node,col_local_node) pairs:
323  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
324 
325  const unsigned row_node = elem_node_id( ielem , row_local_node );
326 
327  for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
328 
329  const unsigned col_node = elem_node_id( ielem , col_local_node );
330 
331  // If either node is locally owned then insert the pair into the unordered map:
332 
333  if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) {
334 
335  const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
336 
337  const typename SetType::insert_result result = node_node_set.insert( key );
338 
339  if ( result.success() ) {
340  if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 ); }
341  if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 ); }
342  }
343  }
344  }
345  }
346  }
347 
348  KOKKOS_INLINE_FUNCTION
349  void fill_graph_entries( const unsigned iset ) const
350  {
351  if ( node_node_set.valid_at(iset) ) {
352  const key_type key = node_node_set.key_at(iset) ;
353  const unsigned row_node = key.first ;
354  const unsigned col_node = key.second ;
355 
356  if ( row_node < row_count.extent(0) ) {
357  const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 );
358  graph.entries( offset ) = col_node ;
359  }
360 
361  if ( col_node < row_count.extent(0) && col_node != row_node ) {
362  const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 );
363  graph.entries( offset ) = row_node ;
364  }
365  }
366  }
367 
368  KOKKOS_INLINE_FUNCTION
369  void sort_graph_entries( const unsigned irow ) const
370  {
371  typedef typename CrsGraphType::size_type size_type;
372  const size_type row_beg = graph.row_map( irow );
373  const size_type row_end = graph.row_map( irow + 1 );
374  for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
375  const typename CrsGraphType::data_type col = graph.entries(i);
376  size_type j = i ;
377  for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
378  graph.entries(j) = graph.entries(j-1);
379  }
380  graph.entries(j) = col ;
381  }
382  }
383 
384  KOKKOS_INLINE_FUNCTION
385  void fill_elem_graph_map( const unsigned ielem ) const
386  {
387  typedef typename CrsGraphType::data_type entry_type;
388  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
389 
390  const unsigned row_node = elem_node_id( ielem , row_local_node );
391 
392  for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
393 
394  const unsigned col_node = elem_node_id( ielem , col_local_node );
395 
396  entry_type entry = 0 ;
397 
398  if ( row_node + 1 < graph.row_map.extent(0) ) {
399 
400  const entry_type entry_end = static_cast<entry_type> (graph.row_map( row_node + 1 ));
401 
402  entry = graph.row_map( row_node );
403 
404  for ( ; entry < entry_end && graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
405 
406  if ( entry == entry_end ) entry = ~0u ;
407  }
408 
409  elem_graph( ielem , row_local_node , col_local_node ) = entry ;
410  }
411  }
412  }
413 
414  KOKKOS_INLINE_FUNCTION
415  void operator()( const unsigned iwork ) const
416  {
417  if ( phase == FILL_NODE_SET ) {
418  fill_set( iwork );
419  }
420  else if ( phase == FILL_GRAPH_ENTRIES ) {
421  fill_graph_entries( iwork );
422  }
423  else if ( phase == SORT_GRAPH_ENTRIES ) {
424  sort_graph_entries( iwork );
425  }
426  else if ( phase == FILL_ELEMENT_GRAPH ) {
427  fill_elem_graph_map( iwork );
428  }
429  }
430 
431  //------------------------------------
432  // parallel_scan: row offsets
433 
434  typedef unsigned value_type ;
435 
436  KOKKOS_INLINE_FUNCTION
437  void operator()( const unsigned irow , unsigned & update , const bool final ) const
438  {
439  // exclusive scan
440  if ( final ) { row_map( irow ) = update ; }
441 
442  update += row_count( irow );
443 
444  if ( final ) {
445  if ( irow + 1 == row_count.extent(0) ) {
446  row_map( irow + 1 ) = update ;
447  row_total() = update ;
448  }
449  }
450  }
451 
452  KOKKOS_INLINE_FUNCTION
453  void init( unsigned & update ) const { update = 0 ; }
454 
455  KOKKOS_INLINE_FUNCTION
456  void join( unsigned & update , const unsigned & input ) const { update += input ; }
457 
458  //------------------------------------
459 };
460 
461 } /* namespace FENL */
462 } /* namespace Example */
463 } /* namespace Kokkos */
464 
465 //----------------------------------------------------------------------------
466 //----------------------------------------------------------------------------
467 
468 namespace Kokkos {
469 namespace Example {
470 namespace FENL {
471 
473  enum { is_constant = false };
474 
475  const double coeff_k ;
476 
477  KOKKOS_INLINE_FUNCTION
478  double operator()( double pt[], unsigned ensemble_rank) const
479  { return coeff_k * std::sin(pt[0]) * std::sin(pt[1]) * std::sin(pt[2]); }
480 
482  : coeff_k( val ) {}
483 
485  : coeff_k( rhs.coeff_k ) {}
486 };
487 
488 // Exponential KL from Stokhos
489 template < typename Scalar, typename MeshScalar, typename Device >
491 public:
492 
493  // Turn into a meta-function class usable with Sacado::mpl::apply
494  template <typename T1, typename T2 = MeshScalar, typename T3 = Device>
495  struct apply {
497  };
498 
499  enum { is_constant = false };
500  typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device> RandomVariableView;
501  typedef typename RandomVariableView::size_type size_type;
502 
507 
508  rf_type m_rf; // Exponential random field
509  const MeshScalar m_mean; // Mean of random field
510  const MeshScalar m_variance; // Variance of random field
511  const MeshScalar m_corr_len; // Correlation length of random field
512  const size_type m_num_rv; // Number of random variables
513  RandomVariableView m_rv; // KL random variables
514 
515 public:
516 
518  const MeshScalar mean ,
519  const MeshScalar variance ,
520  const MeshScalar correlation_length ,
521  const size_type num_rv ) :
522  m_mean( mean ),
523  m_variance( variance ),
524  m_corr_len( correlation_length ),
525  m_num_rv( num_rv ),
526  m_rv( "KL Random Variables", m_num_rv )
527  {
528  Teuchos::ParameterList solverParams;
529  solverParams.set("Number of KL Terms", int(num_rv));
530  solverParams.set("Mean", mean);
531  solverParams.set("Standard Deviation", std::sqrt(variance));
532  int ndim = 3;
533  Teuchos::Array<double> domain_upper(ndim, 1.0), domain_lower(ndim, 0.0),
534  correlation_lengths(ndim, correlation_length);
535  solverParams.set("Domain Upper Bounds", domain_upper);
536  solverParams.set("Domain Lower Bounds", domain_lower);
537  solverParams.set("Correlation Lengths", correlation_lengths);
538 
539  m_rf = rf_type(solverParams);
540  }
541 
543  m_rf( rhs.m_rf ) ,
544  m_mean( rhs.m_mean ) ,
545  m_variance( rhs.m_variance ) ,
546  m_corr_len( rhs.m_corr_len ) ,
547  m_num_rv( rhs.m_num_rv ) ,
548  m_rv( rhs.m_rv ) {}
549 
550  KOKKOS_INLINE_FUNCTION
551  void setRandomVariables( const RandomVariableView& rv) { m_rv = rv; }
552 
553  KOKKOS_INLINE_FUNCTION
555 
556  KOKKOS_INLINE_FUNCTION
557  local_scalar_type operator() ( const MeshScalar point[],
558  const size_type ensemble_rank ) const
559  {
560  local_rv_view_type local_rv =
562 
563  local_scalar_type val = m_rf.evaluate(point, local_rv);
564 
565  return val;
566  }
567 };
568 
569 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
570  class CoordinateMap , typename ScalarType >
572 {
573 public:
574 
577 
578  //------------------------------------
579 
580  typedef ExecutionSpace execution_space ;
581  typedef ScalarType scalar_type ;
582 
585  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
586 
587  //------------------------------------
588 
590  static const unsigned TensorDim = SpatialDim * SpatialDim ;
594 
595  //------------------------------------
596 
599  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
600  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
601 
603 
604  //------------------------------------
605 
606 
607  //------------------------------------
608  // Computational data:
609 
619 
621  : elem_data()
623  , node_coords( rhs.node_coords )
624  , elem_graph( rhs.elem_graph )
627  , solution( rhs.solution )
628  , residual( rhs.residual )
629  , jacobian( rhs.jacobian )
630  {}
631 
632  ElementComputationBase( const mesh_type & arg_mesh ,
633  const vector_type & arg_solution ,
634  const elem_graph_type & arg_elem_graph ,
635  const sparse_matrix_type & arg_jacobian ,
636  const vector_type & arg_residual )
637  : elem_data()
638  , elem_node_ids( arg_mesh.elem_node() )
639  , node_coords( arg_mesh.node_coord() )
640  , elem_graph( arg_elem_graph )
641  , elem_jacobians()
642  , elem_residuals()
643  , solution( arg_solution )
644  , residual( arg_residual )
645  , jacobian( arg_jacobian )
646  {}
647 
648  //------------------------------------
649 
650  KOKKOS_INLINE_FUNCTION
652  const double grad[][ FunctionCount ] , // Gradient of bases master element
653  const double x[] ,
654  const double y[] ,
655  const double z[] ,
656  double dpsidx[] ,
657  double dpsidy[] ,
658  double dpsidz[] ) const
659  {
660  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
661  j21 = 3 , j22 = 4 , j23 = 5 ,
662  j31 = 6 , j32 = 7 , j33 = 8 };
663 
664  // Jacobian accumulation:
665 
666  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
667 
668  for( unsigned i = 0; i < FunctionCount ; ++i ) {
669  const double x1 = x[i] ;
670  const double x2 = y[i] ;
671  const double x3 = z[i] ;
672 
673  const double g1 = grad[0][i] ;
674  const double g2 = grad[1][i] ;
675  const double g3 = grad[2][i] ;
676 
677  J[j11] += g1 * x1 ;
678  J[j12] += g1 * x2 ;
679  J[j13] += g1 * x3 ;
680 
681  J[j21] += g2 * x1 ;
682  J[j22] += g2 * x2 ;
683  J[j23] += g2 * x3 ;
684 
685  J[j31] += g3 * x1 ;
686  J[j32] += g3 * x2 ;
687  J[j33] += g3 * x3 ;
688  }
689 
690  // Inverse jacobian:
691 
692  double invJ[ TensorDim ] = {
693  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
694  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
695  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
696 
697  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
698  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
699  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
700 
701  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
702  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
703  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
704 
705  const double detJ = J[j11] * invJ[j11] +
706  J[j21] * invJ[j12] +
707  J[j31] * invJ[j13] ;
708 
709  const double detJinv = 1.0 / detJ ;
710 
711  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
712 
713  // Transform gradients:
714 
715  for( unsigned i = 0; i < FunctionCount ; ++i ) {
716  const double g0 = grad[0][i];
717  const double g1 = grad[1][i];
718  const double g2 = grad[2][i];
719 
720  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
721  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
722  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
723  }
724 
725  return detJ ;
726  }
727 
728 };
729 
735 };
736 
737 template< class FiniteElementMeshType ,
738  class SparseMatrixType ,
739  AssemblyMethod Method ,
740  class CoeffFunctionType = ElementComputationConstantCoefficient
741  >
743 
744 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
745  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
747  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
748  CrsMatrix< ScalarType , ExecutionSpace > ,
749  Analytic , CoeffFunctionType > :
750  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
751  ScalarType> {
752 public:
753 
754  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
755  ScalarType> base_type;
756 
759 
760  static const unsigned FunctionCount = base_type::FunctionCount;
761  static const unsigned IntegrationCount = base_type::IntegrationCount;
762  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
763 
764  const CoeffFunctionType coeff_function ;
766 
768  base_type(rhs) ,
769  coeff_function( rhs.coeff_function ) ,
770  dev_config( rhs.dev_config ) {}
771 
773  const typename base_type::mesh_type & arg_mesh ,
774  const CoeffFunctionType & arg_coeff_function ,
775  const typename base_type::vector_type & arg_solution ,
776  const typename base_type::elem_graph_type & arg_elem_graph ,
777  const typename base_type::sparse_matrix_type & arg_jacobian ,
778  const typename base_type::vector_type & arg_residual ,
779  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
780  base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
781  arg_residual),
782  coeff_function( arg_coeff_function ) ,
783  dev_config( arg_dev_config ) {}
784 
785  //------------------------------------
786 
787  void apply() const
788  {
789  const size_t nelem = this->elem_node_ids.extent(0);
790  parallel_for( nelem , *this );
791  }
792 
793  KOKKOS_INLINE_FUNCTION
794  void gatherSolution(const unsigned ielem,
795  scalar_type val[],
796  unsigned node_index[],
797  double x[], double y[], double z[],
798  scalar_type res[],
799  scalar_type mat[][FunctionCount]) const
800  {
801  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
802  const unsigned ni = this->elem_node_ids( ielem , i );
803 
804  node_index[i] = ni ;
805 
806  x[i] = this->node_coords( ni , 0 );
807  y[i] = this->node_coords( ni , 1 );
808  z[i] = this->node_coords( ni , 2 );
809 
810  val[i] = this->solution( ni ) ;
811  res[i] = 0 ;
812 
813  for( unsigned j = 0; j < FunctionCount ; j++){
814  mat[i][j] = 0 ;
815  }
816  }
817  }
818 
819  KOKKOS_INLINE_FUNCTION
820  void scatterResidual(const unsigned ielem,
821  const unsigned node_index[],
822  const scalar_type res[],
823  const scalar_type mat[][FunctionCount]) const
824  {
825  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
826  const unsigned row = node_index[i] ;
827  if ( row < this->residual.extent(0) ) {
828  atomic_add( & this->residual( row ) , res[i] );
829 
830  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
831  const unsigned entry = this->elem_graph( ielem , i , j );
832  if ( entry != ~0u ) {
833  atomic_add( & this->jacobian.values( entry ) , mat[i][j] );
834  }
835  }
836  }
837  }
838  }
839 
840  KOKKOS_INLINE_FUNCTION
842  const scalar_type dof_values[] ,
843  const double x[],
844  const double y[],
845  const double z[],
846  scalar_type elem_res[] ,
847  scalar_type elem_mat[][FunctionCount] ) const
848  {
849  scalar_type coeff_k;
850  double coeff_src = 1.234;
851  double advection[] = { 1.1, 1.2, 1.3 };
852  double dpsidx[ FunctionCount ] ;
853  double dpsidy[ FunctionCount ] ;
854  double dpsidz[ FunctionCount ] ;
855  double pt[] = {0.0, 0.0, 0.0};
856  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
857 
858  // If function is not constant
859  // then compute physical coordinates of integration point
860  if ( ! coeff_function.is_constant ) {
861  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
862  pt[0] += x[j] * this->elem_data.values[i][j] ;
863  pt[1] += y[j] * this->elem_data.values[i][j] ;
864  pt[2] += z[j] * this->elem_data.values[i][j] ;
865  }
866  }
867  coeff_k = coeff_function(pt, 0);
868 
869  const double integ_weight = this->elem_data.weights[i];
870  const double* bases_vals = this->elem_data.values[i];
871  const double detJ =
872  this->transform_gradients( this->elem_data.gradients[i] ,
873  x , y , z ,
874  dpsidx , dpsidy , dpsidz );
875  const double detJ_weight = detJ * integ_weight;
876  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
877 
878  scalar_type value_at_pt = 0 ;
879  scalar_type gradx_at_pt = 0 ;
880  scalar_type grady_at_pt = 0 ;
881  scalar_type gradz_at_pt = 0 ;
882  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
883  value_at_pt += dof_values[m] * bases_vals[m] ;
884  gradx_at_pt += dof_values[m] * dpsidx[m] ;
885  grady_at_pt += dof_values[m] * dpsidy[m] ;
886  gradz_at_pt += dof_values[m] * dpsidz[m] ;
887  }
888 
889  const scalar_type source_term =
890  coeff_src * value_at_pt * value_at_pt ;
891  const scalar_type source_deriv =
892  2.0 * coeff_src * value_at_pt ;
893 
894  const scalar_type advection_x = advection[0];
895  const scalar_type advection_y = advection[1];
896  const scalar_type advection_z = advection[2];
897 
898  const scalar_type advection_term =
899  advection_x*gradx_at_pt +
900  advection_y*grady_at_pt +
901  advection_z*gradz_at_pt ;
902 
903  for ( unsigned m = 0; m < FunctionCount; ++m) {
904  scalar_type * const mat = elem_mat[m] ;
905  const double bases_val_m = bases_vals[m] * detJ_weight ;
906  const double dpsidx_m = dpsidx[m] ;
907  const double dpsidy_m = dpsidy[m] ;
908  const double dpsidz_m = dpsidz[m] ;
909 
910  elem_res[m] +=
911  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
912  dpsidy_m * grady_at_pt +
913  dpsidz_m * gradz_at_pt ) +
914  bases_val_m * ( advection_term + source_term ) ;
915 
916  for( unsigned n = 0; n < FunctionCount; n++) {
917  const double dpsidx_n = dpsidx[n] ;
918  const double dpsidy_n = dpsidy[n] ;
919  const double dpsidz_n = dpsidz[n] ;
920  mat[n] +=
921  detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
922  dpsidy_m * dpsidy_n +
923  dpsidz_m * dpsidz_n ) +
924  bases_val_m * ( advection_x * dpsidx_n +
925  advection_y * dpsidy_n +
926  advection_z * dpsidz_n +
927  source_deriv * bases_vals[n] ) ;
928  }
929  }
930  }
931  }
932 
933  KOKKOS_INLINE_FUNCTION
934  void operator()( const unsigned ielem ) const
935  {
936  double x[ FunctionCount ] ;
937  double y[ FunctionCount ] ;
938  double z[ FunctionCount ] ;
939  unsigned node_index[ ElemNodeCount ];
940 
942  scalar_type elem_res[ FunctionCount ] ;
943  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
944 
945  // Gather nodal coordinates and solution vector:
946  gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
947 
948  // Compute nodal element residual vector and Jacobian matrix
949  computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
950 
951  // Scatter nodal element residual and Jacobian in global vector and matrix:
952  scatterResidual( ielem, node_index, elem_res, elem_mat );
953  }
954 }; /* ElementComputation */
955 
956 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
957  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
959  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
960  CrsMatrix< ScalarType , ExecutionSpace > ,
961  FadElement , CoeffFunctionType > :
962  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
963  ScalarType> {
964 public:
965 
966  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
967  ScalarType> base_type;
968 
971 
972  static const unsigned FunctionCount = base_type::FunctionCount;
973  static const unsigned IntegrationCount = base_type::IntegrationCount;
974  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
975 
976  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
977  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
978 
979  const CoeffFunctionType coeff_function ;
981 
983  base_type(rhs) ,
984  coeff_function( rhs.coeff_function ) ,
985  dev_config( rhs.dev_config ) {}
986 
988  const typename base_type::mesh_type & arg_mesh ,
989  const CoeffFunctionType & arg_coeff_function ,
990  const typename base_type::vector_type & arg_solution ,
991  const typename base_type::elem_graph_type & arg_elem_graph ,
992  const typename base_type::sparse_matrix_type & arg_jacobian ,
993  const typename base_type::vector_type & arg_residual ,
994  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
995  base_type(arg_mesh, arg_solution, arg_elem_graph,
996  arg_jacobian, arg_residual),
997  coeff_function( arg_coeff_function ) ,
998  dev_config( arg_dev_config ) {}
999 
1000  //------------------------------------
1001 
1002  void apply() const
1003  {
1004  const size_t nelem = this->elem_node_ids.extent(0);
1005  parallel_for( nelem , *this );
1006  }
1007 
1008  KOKKOS_INLINE_FUNCTION
1009  void gatherSolution(const unsigned ielem,
1010  fad_scalar_type val[],
1011  unsigned node_index[],
1012  double x[], double y[], double z[],
1013  fad_scalar_type res[]) const
1014  {
1015  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1016  const unsigned ni = this->elem_node_ids( ielem , i );
1017 
1018  node_index[i] = ni ;
1019 
1020  x[i] = this->node_coords( ni , 0 );
1021  y[i] = this->node_coords( ni , 1 );
1022  z[i] = this->node_coords( ni , 2 );
1023 
1024  val[i].val() = this->solution( ni );
1025  val[i].diff( i, FunctionCount );
1026  }
1027  }
1028 
1029  KOKKOS_INLINE_FUNCTION
1030  void scatterResidual(const unsigned ielem,
1031  const unsigned node_index[],
1032  fad_scalar_type res[]) const
1033  {
1034  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1035  const unsigned row = node_index[i] ;
1036  if ( row < this->residual.extent(0) ) {
1037  atomic_add( & this->residual( row ) , res[i].val() );
1038 
1039  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1040  const unsigned entry = this->elem_graph( ielem , i , j );
1041  if ( entry != ~0u ) {
1042  atomic_add( & this->jacobian.values( entry ) ,
1043  res[i].fastAccessDx(j) );
1044  }
1045  }
1046  }
1047  }
1048  }
1049 
1050  template <typename local_scalar_type>
1051  KOKKOS_INLINE_FUNCTION
1052  void computeElementResidual(const local_scalar_type dof_values[] ,
1053  const double x[],
1054  const double y[],
1055  const double z[],
1056  local_scalar_type elem_res[] ) const
1057  {
1058  scalar_type coeff_k;
1059  double coeff_src = 1.234;
1060  double advection[] = { 1.1, 1.2, 1.3 };
1061  double dpsidx[ FunctionCount ] ;
1062  double dpsidy[ FunctionCount ] ;
1063  double dpsidz[ FunctionCount ] ;
1064  double pt[] = {0.0, 0.0, 0.0};
1065  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1066 
1067  // If function is not constant
1068  // then compute physical coordinates of integration point
1069  if ( ! coeff_function.is_constant ) {
1070  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1071  pt[0] += x[j] * this->elem_data.values[i][j] ;
1072  pt[1] += y[j] * this->elem_data.values[i][j] ;
1073  pt[2] += z[j] * this->elem_data.values[i][j] ;
1074  }
1075  }
1076  coeff_k = coeff_function(pt, 0);
1077 
1078  const double integ_weight = this->elem_data.weights[i];
1079  const double* bases_vals = this->elem_data.values[i];
1080  const double detJ =
1081  this->transform_gradients( this->elem_data.gradients[i] ,
1082  x , y , z ,
1083  dpsidx , dpsidy , dpsidz );
1084  const double detJ_weight = detJ * integ_weight;
1085  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1086 
1087  local_scalar_type value_at_pt = 0 ;
1088  local_scalar_type gradx_at_pt = 0 ;
1089  local_scalar_type grady_at_pt = 0 ;
1090  local_scalar_type gradz_at_pt = 0 ;
1091  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1092  value_at_pt += dof_values[m] * bases_vals[m] ;
1093  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1094  grady_at_pt += dof_values[m] * dpsidy[m] ;
1095  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1096  }
1097 
1098  const local_scalar_type source_term =
1099  coeff_src * value_at_pt * value_at_pt ;
1100 
1101  const local_scalar_type advection_term =
1102  advection[0]*gradx_at_pt +
1103  advection[1]*grady_at_pt +
1104  advection[2]*gradz_at_pt;
1105 
1106  for ( unsigned m = 0; m < FunctionCount; ++m) {
1107  const double bases_val_m = bases_vals[m] * detJ_weight ;
1108  const double dpsidx_m = dpsidx[m] ;
1109  const double dpsidy_m = dpsidy[m] ;
1110  const double dpsidz_m = dpsidz[m] ;
1111 
1112  elem_res[m] +=
1113  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1114  dpsidy_m * grady_at_pt +
1115  dpsidz_m * gradz_at_pt ) +
1116  bases_val_m * ( advection_term + source_term ) ;
1117  }
1118  }
1119  }
1120 
1121  KOKKOS_INLINE_FUNCTION
1122  void operator()( const unsigned ielem ) const
1123  {
1124  double x[ FunctionCount ] ;
1125  double y[ FunctionCount ] ;
1126  double z[ FunctionCount ] ;
1127  unsigned node_index[ ElemNodeCount ];
1128 
1130  fad_scalar_type elem_res[ FunctionCount ] ; // this zeros elem_res
1131 
1132  // Gather nodal coordinates and solution vector:
1133  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1134 
1135  // Compute nodal element residual vector:
1136  computeElementResidual( val, x, y, z, elem_res );
1137 
1138  // Scatter nodal element residual in global vector:
1139  scatterResidual( ielem, node_index, elem_res );
1140  }
1141 }; /* ElementComputation */
1142 
1143 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1144  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1146  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1147  CrsMatrix< ScalarType , ExecutionSpace > ,
1148  FadElementOptimized , CoeffFunctionType > :
1149  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1150  CrsMatrix< ScalarType , ExecutionSpace > ,
1151  FadElement, CoeffFunctionType > {
1152 public:
1153 
1156  FadElement , CoeffFunctionType > base_type;
1157 
1160 
1161  static const unsigned FunctionCount = base_type::FunctionCount;
1162  static const unsigned IntegrationCount = base_type::IntegrationCount;
1163  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1164 
1165  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1166  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1167 
1169 
1171  const typename base_type::mesh_type & arg_mesh ,
1172  const CoeffFunctionType & arg_coeff_function ,
1173  const typename base_type::vector_type & arg_solution ,
1174  const typename base_type::elem_graph_type & arg_elem_graph ,
1175  const typename base_type::sparse_matrix_type & arg_jacobian ,
1176  const typename base_type::vector_type & arg_residual ,
1177  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1178  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1179  arg_jacobian, arg_residual, arg_dev_config) {}
1180 
1181  //------------------------------------
1182 
1183  void apply() const
1184  {
1185  const size_t nelem = this->elem_node_ids.extent(0);
1186  parallel_for( nelem , *this );
1187  }
1188 
1189  KOKKOS_INLINE_FUNCTION
1190  void gatherSolution(const unsigned ielem,
1191  scalar_type val[],
1192  unsigned node_index[],
1193  double x[], double y[], double z[],
1194  fad_scalar_type res[]) const
1195  {
1196  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1197  const unsigned ni = this->elem_node_ids( ielem , i );
1198 
1199  node_index[i] = ni ;
1200 
1201  x[i] = this->node_coords( ni , 0 );
1202  y[i] = this->node_coords( ni , 1 );
1203  z[i] = this->node_coords( ni , 2 );
1204 
1205  val[i] = this->solution( ni );
1206  }
1207  }
1208 
1209  template <typename local_scalar_type>
1210  KOKKOS_INLINE_FUNCTION
1211  void computeElementResidual(const scalar_type dof_values[] ,
1212  const double x[],
1213  const double y[],
1214  const double z[],
1215  local_scalar_type elem_res[] ) const
1216  {
1217  scalar_type coeff_k;
1218  double coeff_src = 1.234;
1219  double advection[] = { 1.1, 1.2, 1.3 };
1220  double dpsidx[ FunctionCount ] ;
1221  double dpsidy[ FunctionCount ] ;
1222  double dpsidz[ FunctionCount ] ;
1223  double pt[] = {0.0, 0.0, 0.0};
1224  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1225 
1226  // If function is not constant
1227  // then compute physical coordinates of integration point
1228  if ( ! this->coeff_function.is_constant ) {
1229  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1230  pt[0] += x[j] * this->elem_data.values[i][j] ;
1231  pt[1] += y[j] * this->elem_data.values[i][j] ;
1232  pt[2] += z[j] * this->elem_data.values[i][j] ;
1233  }
1234  }
1235  coeff_k = this->coeff_function(pt, 0);
1236 
1237  const double integ_weight = this->elem_data.weights[i];
1238  const double* bases_vals = this->elem_data.values[i];
1239  const double detJ =
1240  this->transform_gradients( this->elem_data.gradients[i] ,
1241  x , y , z ,
1242  dpsidx , dpsidy , dpsidz );
1243  const double detJ_weight = detJ * integ_weight;
1244  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1245 
1246  local_scalar_type value_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1247  local_scalar_type gradx_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1248  local_scalar_type grady_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1249  local_scalar_type gradz_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1250  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1251  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1252  value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1253 
1254  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1255  gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1256 
1257  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1258  grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1259 
1260  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1261  gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1262  }
1263 
1264  const local_scalar_type source_term =
1265  coeff_src * value_at_pt * value_at_pt ;
1266 
1267  const local_scalar_type advection_term =
1268  advection[0]*gradx_at_pt +
1269  advection[1]*grady_at_pt +
1270  advection[2]*gradz_at_pt;
1271 
1272  for ( unsigned m = 0; m < FunctionCount; ++m) {
1273  const double bases_val_m = bases_vals[m] * detJ_weight ;
1274  const double dpsidx_m = dpsidx[m] ;
1275  const double dpsidy_m = dpsidy[m] ;
1276  const double dpsidz_m = dpsidz[m] ;
1277 
1278  elem_res[m] +=
1279  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1280  dpsidy_m * grady_at_pt +
1281  dpsidz_m * gradz_at_pt ) +
1282  bases_val_m * ( advection_term + source_term ) ;
1283  }
1284  }
1285  }
1286 
1287  KOKKOS_INLINE_FUNCTION
1288  void operator()( const unsigned ielem ) const
1289  {
1290  double x[ FunctionCount ] ;
1291  double y[ FunctionCount ] ;
1292  double z[ FunctionCount ] ;
1293  unsigned node_index[ ElemNodeCount ];
1294 
1296  fad_scalar_type elem_res[ FunctionCount ] ;
1297 
1298  // Gather nodal coordinates and solution vector:
1299  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1300 
1301  // Compute nodal element residual vector:
1302  computeElementResidual( val, x, y, z, elem_res );
1303 
1304  // Scatter nodal element residual in global vector:
1305  this->scatterResidual( ielem, node_index, elem_res );
1306  }
1307 }; /* ElementComputation */
1308 
1309 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1310  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1312  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1313  CrsMatrix< ScalarType , ExecutionSpace > ,
1314  FadQuadPoint , CoeffFunctionType > :
1315  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1316  CrsMatrix< ScalarType , ExecutionSpace > ,
1317  Analytic , CoeffFunctionType > {
1318 public:
1319 
1322  Analytic , CoeffFunctionType > base_type;
1323 
1326 
1327  static const unsigned FunctionCount = base_type::FunctionCount;
1328  static const unsigned IntegrationCount = base_type::IntegrationCount;
1329  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1330 
1331  typedef Sacado::Fad::SLFad<scalar_type,4> fad_scalar_type;
1332  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1333 
1335 
1337  const typename base_type::mesh_type & arg_mesh ,
1338  const CoeffFunctionType & arg_coeff_function ,
1339  const typename base_type::vector_type & arg_solution ,
1340  const typename base_type::elem_graph_type & arg_elem_graph ,
1341  const typename base_type::sparse_matrix_type & arg_jacobian ,
1342  const typename base_type::vector_type & arg_residual ,
1343  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1344  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1345  arg_jacobian, arg_residual, arg_dev_config) {}
1346 
1347  //------------------------------------
1348 
1349  void apply() const
1350  {
1351  const size_t nelem = this->elem_node_ids.extent(0);
1352  parallel_for( nelem , *this );
1353  }
1354 
1355  KOKKOS_INLINE_FUNCTION
1357  const scalar_type dof_values[] ,
1358  const double x[],
1359  const double y[],
1360  const double z[],
1361  scalar_type elem_res[] ,
1362  scalar_type elem_mat[][FunctionCount] ) const
1363  {
1364  scalar_type coeff_k;
1365  double coeff_src = 1.234;
1366  double advection[] = { 1.1, 1.2, 1.3 };
1367  double dpsidx[ FunctionCount ] ;
1368  double dpsidy[ FunctionCount ] ;
1369  double dpsidz[ FunctionCount ] ;
1370  double pt[] = {0.0, 0.0, 0.0};
1371 
1372  fad_scalar_type value_at_pt(4, 0, 0.0) ;
1373  fad_scalar_type gradx_at_pt(4, 1, 0.0) ;
1374  fad_scalar_type grady_at_pt(4, 2, 0.0) ;
1375  fad_scalar_type gradz_at_pt(4, 3, 0.0) ;
1376  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1377 
1378  // If function is not constant
1379  // then compute physical coordinates of integration point
1380  if ( ! this->coeff_function.is_constant ) {
1381  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1382  pt[0] += x[j] * this->elem_data.values[i][j] ;
1383  pt[1] += y[j] * this->elem_data.values[i][j] ;
1384  pt[2] += z[j] * this->elem_data.values[i][j] ;
1385  }
1386  }
1387  coeff_k = this->coeff_function(pt, 0);
1388 
1389  const double integ_weight = this->elem_data.weights[i];
1390  const double* bases_vals = this->elem_data.values[i];
1391  const double detJ =
1392  this->transform_gradients( this->elem_data.gradients[i] ,
1393  x , y , z ,
1394  dpsidx , dpsidy , dpsidz );
1395  const double detJ_weight = detJ * integ_weight;
1396  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1397 
1398  value_at_pt.val() = 0.0 ;
1399  gradx_at_pt.val() = 0.0 ;
1400  grady_at_pt.val() = 0.0 ;
1401  gradz_at_pt.val() = 0.0 ;
1402  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1403  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1404  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1405  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1406  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1407  }
1408 
1409  const fad_scalar_type source_term =
1410  coeff_src * value_at_pt * value_at_pt ;
1411 
1412  const fad_scalar_type advection_term =
1413  advection[0]*gradx_at_pt +
1414  advection[1]*grady_at_pt +
1415  advection[2]*gradz_at_pt;
1416 
1417  for ( unsigned m = 0; m < FunctionCount; ++m) {
1418  const double bases_val_m = bases_vals[m] * detJ_weight ;
1419  fad_scalar_type res =
1420  detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1421  dpsidy[m] * grady_at_pt +
1422  dpsidz[m] * gradz_at_pt ) +
1423  bases_val_m * ( advection_term + source_term ) ;
1424 
1425  elem_res[m] += res.val();
1426 
1427  scalar_type * const mat = elem_mat[m] ;
1428  for( unsigned n = 0; n < FunctionCount; n++) {
1429  mat[n] += res.fastAccessDx(0) * bases_vals[n] +
1430  res.fastAccessDx(1) * dpsidx[n] +
1431  res.fastAccessDx(2) * dpsidy[n] +
1432  res.fastAccessDx(3) * dpsidz[n];
1433  }
1434  }
1435  }
1436  }
1437 
1438  KOKKOS_INLINE_FUNCTION
1439  void operator()( const unsigned ielem ) const
1440  {
1441  double x[ FunctionCount ] ;
1442  double y[ FunctionCount ] ;
1443  double z[ FunctionCount ] ;
1444  unsigned node_index[ ElemNodeCount ];
1445 
1447  scalar_type elem_res[ FunctionCount ] ;
1448  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1449 
1450  // Gather nodal coordinates and solution vector:
1451  this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1452 
1453  // Compute nodal element residual vector and Jacobian matrix:
1454  computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1455 
1456  // Scatter nodal element residual and Jacobian in global vector and matrix:
1457  this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1458  }
1459 }; /* ElementComputation */
1460 
1461 #if 0
1462 template< class FiniteElementMeshType , class SparseMatrixType
1463  , class CoeffFunctionType = ElementComputationConstantCoefficient
1464  >
1465 class ElementComputation ;
1466 
1467 
1468 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1469  typename ScalarType , class CoeffFunctionType >
1470 class ElementComputation
1471  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap >
1472  , Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace >
1473  , CoeffFunctionType >
1474 {
1475 public:
1476 
1479 
1480  //------------------------------------
1481 
1482  typedef ExecutionSpace execution_space ;
1483  typedef ScalarType scalar_type ;
1484 
1487  typedef typename sparse_matrix_type::values_type matrix_values_type ;
1488  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
1489 
1490  //------------------------------------
1491 
1492  typedef LocalViewTraits< vector_type > local_vector_view_traits;
1493  typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1494  typedef typename local_vector_view_traits::local_view_type local_vector_type;
1495  typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1496  typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1497  static const bool use_team = local_vector_view_traits::use_team;
1498 
1499  static const unsigned SpatialDim = element_data_type::spatial_dimension ;
1500  static const unsigned TensorDim = SpatialDim * SpatialDim ;
1501  static const unsigned ElemNodeCount = element_data_type::element_node_count ;
1502  static const unsigned FunctionCount = element_data_type::function_count ;
1503  static const unsigned IntegrationCount = element_data_type::integration_count ;
1504 
1505  //------------------------------------
1506 
1507  typedef typename mesh_type::node_coord_type node_coord_type ;
1508  typedef typename mesh_type::elem_node_type elem_node_type ;
1509  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
1510  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
1511 
1512  typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1513  typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1514  typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1515  typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1516 
1517  typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ;
1518 
1519  //------------------------------------
1520 
1521 
1522  //------------------------------------
1523  // Computational data:
1524 
1528  const elem_graph_type elem_graph ;
1531  const vector_type solution ;
1532  const vector_type residual ;
1534  const CoeffFunctionType coeff_function ;
1535  const Kokkos::Example::FENL::DeviceConfig dev_config ;
1536 
1537  ElementComputation( const ElementComputation & rhs )
1538  : elem_data()
1539  , elem_node_ids( rhs.elem_node_ids )
1540  , node_coords( rhs.node_coords )
1541  , elem_graph( rhs.elem_graph )
1544  , solution( rhs.solution )
1545  , residual( rhs.residual )
1546  , jacobian( rhs.jacobian )
1547  , coeff_function( rhs.coeff_function )
1548  , dev_config( rhs.dev_config )
1549  {}
1550 
1551  // If the element->sparse_matrix graph is provided then perform atomic updates
1552  // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
1553  ElementComputation( const mesh_type & arg_mesh ,
1554  const CoeffFunctionType & arg_coeff_function ,
1555  const vector_type & arg_solution ,
1556  const elem_graph_type & arg_elem_graph ,
1557  const sparse_matrix_type & arg_jacobian ,
1558  const vector_type & arg_residual ,
1559  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1560  : elem_data()
1561  , elem_node_ids( arg_mesh.elem_node() )
1562  , node_coords( arg_mesh.node_coord() )
1563  , elem_graph( arg_elem_graph )
1564  , elem_jacobians()
1565  , elem_residuals()
1566  , solution( arg_solution )
1567  , residual( arg_residual )
1568  , jacobian( arg_jacobian )
1569  , coeff_function( arg_coeff_function )
1570  , dev_config( arg_dev_config )
1571  {}
1572 
1573  //------------------------------------
1574 
1575  void apply() const
1576  {
1577  const size_t nelem = elem_node_ids.extent(0);
1578  if ( use_team ) {
1579  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1580  const size_t league_size =
1581  (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1582  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1583  parallel_for( config , *this );
1584  }
1585  else {
1586  parallel_for( nelem , *this );
1587  }
1588  }
1589 
1590  //------------------------------------
1591 
1592  static const unsigned FLOPS_transform_gradients =
1593  /* Jacobian */ FunctionCount * TensorDim * 2 +
1594  /* Inverse jacobian */ TensorDim * 6 + 6 +
1595  /* Gradient transform */ FunctionCount * 15 ;
1596 
1597  KOKKOS_INLINE_FUNCTION
1598  double transform_gradients(
1599  const double grad[][ FunctionCount ] , // Gradient of bases master element
1600  const double x[] ,
1601  const double y[] ,
1602  const double z[] ,
1603  double dpsidx[] ,
1604  double dpsidy[] ,
1605  double dpsidz[] ) const
1606  {
1607  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1608  j21 = 3 , j22 = 4 , j23 = 5 ,
1609  j31 = 6 , j32 = 7 , j33 = 8 };
1610 
1611  // Jacobian accumulation:
1612 
1613  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1614 
1615  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1616  const double x1 = x[i] ;
1617  const double x2 = y[i] ;
1618  const double x3 = z[i] ;
1619 
1620  const double g1 = grad[0][i] ;
1621  const double g2 = grad[1][i] ;
1622  const double g3 = grad[2][i] ;
1623 
1624  J[j11] += g1 * x1 ;
1625  J[j12] += g1 * x2 ;
1626  J[j13] += g1 * x3 ;
1627 
1628  J[j21] += g2 * x1 ;
1629  J[j22] += g2 * x2 ;
1630  J[j23] += g2 * x3 ;
1631 
1632  J[j31] += g3 * x1 ;
1633  J[j32] += g3 * x2 ;
1634  J[j33] += g3 * x3 ;
1635  }
1636 
1637  // Inverse jacobian:
1638 
1639  double invJ[ TensorDim ] = {
1640  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1641  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1642  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1643 
1644  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1645  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1646  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1647 
1648  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1649  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1650  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1651 
1652  const double detJ = J[j11] * invJ[j11] +
1653  J[j21] * invJ[j12] +
1654  J[j31] * invJ[j13] ;
1655 
1656  const double detJinv = 1.0 / detJ ;
1657 
1658  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1659 
1660  // Transform gradients:
1661 
1662  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1663  const double g0 = grad[0][i];
1664  const double g1 = grad[1][i];
1665  const double g2 = grad[2][i];
1666 
1667  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1668  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1669  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1670  }
1671 
1672  return detJ ;
1673  }
1674 
1675  KOKKOS_INLINE_FUNCTION
1676  void contributeResidualJacobian(
1677  const local_scalar_type dof_values[] ,
1678  const double dpsidx[] ,
1679  const double dpsidy[] ,
1680  const double dpsidz[] ,
1681  const double detJ ,
1682  const local_scalar_type coeff_k ,
1683  const double integ_weight ,
1684  const double bases_vals[] ,
1685  local_scalar_type elem_res[] ,
1686  local_scalar_type elem_mat[][ FunctionCount ] ) const
1687  {
1688  local_scalar_type value_at_pt = 0 ;
1689  local_scalar_type gradx_at_pt = 0 ;
1690  local_scalar_type grady_at_pt = 0 ;
1691  local_scalar_type gradz_at_pt = 0 ;
1692 
1693  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1694  value_at_pt += dof_values[m] * bases_vals[m] ;
1695  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1696  grady_at_pt += dof_values[m] * dpsidy[m] ;
1697  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1698  }
1699 
1700  const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1701  const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1702  const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1703 
1704  // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
1705  // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
1706 
1707  for ( unsigned m = 0; m < FunctionCount; ++m) {
1708  local_scalar_type * const mat = elem_mat[m] ;
1709  const double bases_val_m = bases_vals[m];
1710  const double dpsidx_m = dpsidx[m] ;
1711  const double dpsidy_m = dpsidy[m] ;
1712  const double dpsidz_m = dpsidz[m] ;
1713 
1714  elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1715  dpsidy_m * grady_at_pt +
1716  dpsidz_m * gradz_at_pt ) +
1717  res_val * bases_val_m ;
1718 
1719  for( unsigned n = 0; n < FunctionCount; n++) {
1720 
1721  mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
1722  dpsidy_m * dpsidy[n] +
1723  dpsidz_m * dpsidz[n] ) +
1724  mat_val * bases_val_m * bases_vals[n];
1725  }
1726  }
1727  }
1728 
1729  KOKKOS_INLINE_FUNCTION
1730  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1731  {
1732 
1733  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1734  const unsigned num_element_threads = dev_config.block_dim.y ;
1735  const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1736  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1737 
1738  const unsigned ielem =
1739  dev.league_rank() * num_element_threads + element_rank;
1740 
1741  if (ielem >= elem_node_ids.extent(0))
1742  return;
1743 
1744  (*this)( ielem, ensemble_rank );
1745 
1746  }
1747 
1748  KOKKOS_INLINE_FUNCTION
1749  void operator()( const unsigned ielem ,
1750  const unsigned ensemble_rank = 0 ) const
1751  {
1752  local_vector_type local_solution =
1753  local_vector_view_traits::create_local_view(solution,
1754  ensemble_rank);
1755  local_vector_type local_residual =
1756  local_vector_view_traits::create_local_view(residual,
1757  ensemble_rank);
1758  local_matrix_type local_jacobian_values =
1759  local_matrix_view_traits::create_local_view(jacobian.values,
1760  ensemble_rank);
1761 
1762  // Gather nodal coordinates and solution vector:
1763 
1764  double x[ FunctionCount ] ;
1765  double y[ FunctionCount ] ;
1766  double z[ FunctionCount ] ;
1767  local_scalar_type val[ FunctionCount ] ;
1768  unsigned node_index[ ElemNodeCount ];
1769 
1770  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1771  const unsigned ni = elem_node_ids( ielem , i );
1772 
1773  node_index[i] = ni ;
1774 
1775  x[i] = node_coords( ni , 0 );
1776  y[i] = node_coords( ni , 1 );
1777  z[i] = node_coords( ni , 2 );
1778 
1779  val[i] = local_solution( ni );
1780  }
1781 
1782 
1783  local_scalar_type elem_vec[ FunctionCount ] ;
1784  local_scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1785 
1786  for( unsigned i = 0; i < FunctionCount ; i++ ) {
1787  elem_vec[i] = 0 ;
1788  for( unsigned j = 0; j < FunctionCount ; j++){
1789  elem_mat[i][j] = 0 ;
1790  }
1791  }
1792 
1793 
1794  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1795  double dpsidx[ FunctionCount ] ;
1796  double dpsidy[ FunctionCount ] ;
1797  double dpsidz[ FunctionCount ] ;
1798 
1799  local_scalar_type coeff_k = 0 ;
1800 
1801  {
1802  double pt[] = {0.0, 0.0, 0.0};
1803 
1804  // If function is not constant
1805  // then compute physical coordinates of integration point
1806  if ( ! coeff_function.is_constant ) {
1807  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1808  pt[0] += x[j] * elem_data.values[i][j] ;
1809  pt[1] += y[j] * elem_data.values[i][j] ;
1810  pt[2] += z[j] * elem_data.values[i][j] ;
1811  }
1812  }
1813 
1814  // Need to fix this for local_scalar_type!!!!!!
1815  coeff_k = coeff_function(pt, ensemble_rank);
1816  }
1817 
1818  const double detJ =
1819  transform_gradients( elem_data.gradients[i] , x , y , z ,
1820  dpsidx , dpsidy , dpsidz );
1821 
1822  contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1823  detJ , coeff_k ,
1824  elem_data.weights[i] ,
1825  elem_data.values[i] ,
1826  elem_vec , elem_mat );
1827  }
1828 
1829  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1830  const unsigned row = node_index[i] ;
1831  if ( row < residual.extent(0) ) {
1832  atomic_add( & local_residual( row ) , elem_vec[i] );
1833 
1834  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1835  const unsigned entry = elem_graph( ielem , i , j );
1836  if ( entry != ~0u ) {
1837  atomic_add( & local_jacobian_values( entry ) , elem_mat[i][j] );
1838  }
1839  }
1840  }
1841  }
1842  }
1843 }; /* ElementComputation */
1844 #endif
1845 
1846 //----------------------------------------------------------------------------
1847 
1848 template< class FixtureType , class SparseMatrixType >
1850 
1851 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1852  typename ScalarType >
1854  Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1855  Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace > >
1856 {
1857 public:
1858 
1862 
1863  typedef ExecutionSpace execution_space ;
1864  typedef ScalarType scalar_type ;
1865 
1869  typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
1870 
1871  //------------------------------------
1872 
1877  static const bool use_team = local_vector_view_traits::use_team;
1878 
1879  typedef double bc_scalar_type ;
1880 
1881  //------------------------------------
1882  // Computational data:
1883 
1892  const unsigned bc_plane ;
1893  const unsigned node_count ;
1894  bool init ;
1896 
1897 
1898  DirichletComputation( const mesh_type & arg_mesh ,
1899  const vector_type & arg_solution ,
1900  const sparse_matrix_type & arg_jacobian ,
1901  const vector_type & arg_residual ,
1902  const unsigned arg_bc_plane ,
1903  const bc_scalar_type arg_bc_lower_value ,
1904  const bc_scalar_type arg_bc_upper_value ,
1905  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1906  : node_coords( arg_mesh.node_coord() )
1907  , solution( arg_solution )
1908  , jacobian( arg_jacobian )
1909  , residual( arg_residual )
1910  , bc_lower_value( arg_bc_lower_value )
1911  , bc_upper_value( arg_bc_upper_value )
1912  , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
1913  , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
1914  , bc_plane( arg_bc_plane )
1915  , node_count( arg_mesh.node_count_owned() )
1916  , init( false )
1917  , dev_config( arg_dev_config )
1918  {
1919  parallel_for( node_count , *this );
1920  init = true ;
1921  }
1922 
1923  void apply() const
1924  {
1925  if ( use_team ) {
1926  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1927  const size_t league_size =
1928  (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1929  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1930  parallel_for( config , *this );
1931  }
1932  else
1933  parallel_for( node_count , *this );
1934  }
1935 
1936  //------------------------------------
1937 
1938  KOKKOS_INLINE_FUNCTION
1939  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1940  {
1941 
1942  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1943  const unsigned num_node_threads = dev_config.block_dim.y ;
1944  const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1945  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1946 
1947  const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1948 
1949  if (inode >= node_count)
1950  return;
1951 
1952  (*this)( inode, ensemble_rank );
1953 
1954  }
1955 
1956  KOKKOS_INLINE_FUNCTION
1957  void operator()( const unsigned inode ,
1958  const unsigned ensemble_rank = 0) const
1959  {
1960  local_vector_type local_residual =
1961  local_vector_view_traits::create_local_view(residual,
1962  ensemble_rank);
1963  local_matrix_type local_jacobian_values =
1964  local_matrix_view_traits::create_local_view(jacobian.values,
1965  ensemble_rank);
1966 
1967  // Apply dirichlet boundary condition on the Solution and Residual vectors.
1968  // To maintain the symmetry of the original global stiffness matrix,
1969  // zero out the columns that correspond to boundary conditions, and
1970  // update the residual vector accordingly
1971 
1972  const unsigned iBeg = jacobian.graph.row_map[inode];
1973  const unsigned iEnd = jacobian.graph.row_map[inode+1];
1974 
1975  const scalar_coord_type c = node_coords(inode,bc_plane);
1976  const bool bc_lower = c <= bc_lower_limit ;
1977  const bool bc_upper = bc_upper_limit <= c ;
1978 
1979  if ( ! init ) {
1980  solution(inode) = bc_lower ? bc_lower_value : (
1981  bc_upper ? bc_upper_value : 0 );
1982  }
1983  else {
1984  if ( bc_lower || bc_upper ) {
1985 
1986  local_residual(inode) = 0 ;
1987 
1988  // zero each value on the row, and leave a one
1989  // on the diagonal
1990 
1991  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
1992  local_jacobian_values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
1993  }
1994  }
1995  else {
1996 
1997  // Find any columns that are boundary conditions.
1998  // Clear them and adjust the residual vector
1999 
2000  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2001  const unsigned cnode = jacobian.graph.entries(i) ;
2002  const scalar_coord_type cc = node_coords(cnode,bc_plane);
2003 
2004  if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2005  local_jacobian_values(i) = 0 ;
2006  }
2007  }
2008  }
2009  }
2010  }
2011 };
2012 
2013 } /* namespace FENL */
2014 } /* namespace Example */
2015 } /* namespace Kokkos */
2016 
2017 //----------------------------------------------------------------------------
2018 
2019 /* A Cuda-specific specialization for the element computation functor. */
2020 #if defined( __CUDACC__ )
2021 // #include <NonlinearElement_Cuda.hpp>
2022 #endif
2023 
2024 //----------------------------------------------------------------------------
2025 
2026 #endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
LocalViewTraits< RandomVariableView > local_rv_view_traits
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typename rv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
KOKKOS_INLINE_FUNCTION void join(unsigned &update, const unsigned &input) const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::DefaultExecutionSpace execution_space
double gradients[integration_count][spatial_dimension][function_count]
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
local_rv_view_traits::local_view_type local_rv_view_type
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
ElemNodeIdView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
CrsGraphType::row_map_type::non_const_type RowMapType
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
CrsMatrix(const StaticCrsGraphType &arg_graph)
Kokkos::StaticCrsGraph< unsigned, Space, void, void, unsigned > StaticCrsGraphType
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
double values[integration_count][function_count]
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
expr val()
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
Kokkos::View< unsigned, execution_space > UnsignedValue
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
local_rv_view_traits::local_value_type local_scalar_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
int n
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
Kokkos::UnorderedMap< key_type, void, execution_space > SetType
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const