Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Sacado_Fad_Exp_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef SACADO_FAD_EXP_MP_VECTOR_HPP
43 #define SACADO_FAD_EXP_MP_VECTOR_HPP
44 
45 #include "Sacado_MP_Vector.hpp"
46 
47 namespace Sacado {
48 
49  namespace Fad {
50  namespace Exp {
51 
53  class ExprSpecMPVector {};
54 
56 
61  template <typename T>
62  class Extender<
63  T,
64  typename std::enable_if<
65  Sacado::is_mp_vector<typename T::value_type>::value >::type
66  > : public T
67  {
68  public:
69 
70  typedef typename T::value_type value_type;
71  typedef typename value_type::value_type val_type;
72 
73  // Define expression template specialization
75 
76  // Bring in constructors
77  using T::T;
78 
79  // Bring in methods we are overloading
80  using T::val;
81  using T::dx;
82  using T::fastAccessDx;
83 
85  KOKKOS_INLINE_FUNCTION
86  const val_type& val(int j) const { return T::val().fastAccessCoeff(j); }
87 
89  KOKKOS_INLINE_FUNCTION
90  val_type& val(int j) { return T::val().fastAccessCoeff(j); }
91 
93  KOKKOS_INLINE_FUNCTION
94  val_type dx(int i, int j) const {
95  return this->size() ? this->dx_[i].fastAccessCoeff(j) : val_type(0.0);
96  }
97 
99  KOKKOS_INLINE_FUNCTION
100  val_type& fastAccessDx(int i, int j) {
101  return this->dx_[i].fastAccessCoeff(j);
102  }
103 
105  KOKKOS_INLINE_FUNCTION
106  const val_type& fastAccessDx(int i, int j) const {
107  return this->dx_[i].fastAccessCoeff(j);
108  }
109 
110  };
111 
113  template <typename DstType>
114  class ExprAssign<
115  DstType,
116  typename std::enable_if<
117  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
118  >::type
119  > {
120  public:
121 
123  typedef typename DstType::value_type value_type;
124 
125  // MP::Vector size (assuming static, because that's all we care about)
126  static const int VecNum = Sacado::StaticSize<value_type>::value;
127 
129  template <typename SrcType>
130  KOKKOS_INLINE_FUNCTION
131  static void assign_equal(DstType& dst, const SrcType& x)
132  {
133  const int xsz = x.size();
134 
135  if (xsz != dst.size())
136  dst.resizeAndZero(xsz);
137 
138  const int sz = dst.size();
139 
140  // For ViewStorage, the resize above may not in fact resize the
141  // derivative array, so it is possible that sz != xsz at this point.
142  // The only valid use case here is sz > xsz == 0, so we use sz in the
143  // assignment below
144 
145  if (sz) {
146  if (x.hasFastAccess()) {
147  SACADO_FAD_DERIV_LOOP(i,sz)
148  for (int j=0; j<VecNum; ++j)
149  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
150  }
151  else
152  SACADO_FAD_DERIV_LOOP(i,sz)
153  for (int j=0; j<VecNum; ++j)
154  dst.fastAccessDx(i,j) = x.dx(i,j);
155  }
156 
157  for (int j=0; j<VecNum; ++j)
158  dst.val(j) = x.val(j);
159  }
160 
162  template <typename SrcType>
163  KOKKOS_INLINE_FUNCTION
164  static void assign_plus_equal(DstType& dst, const SrcType& x)
165  {
166  const int xsz = x.size(), sz = dst.size();
167 
168 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
169  if ((xsz != sz) && (xsz != 0) && (sz != 0))
170  throw "Fad Error: Attempt to assign with incompatible sizes";
171 #endif
172 
173  if (xsz) {
174  if (sz) {
175  if (x.hasFastAccess())
176  SACADO_FAD_DERIV_LOOP(i,sz)
177  for (int j=0; j<VecNum; ++j)
178  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
179  else
180  for (int i=0; i<sz; ++i)
181  for (int j=0; j<VecNum; ++j)
182  dst.fastAccessDx(i,j) += x.dx(i,j);
183  }
184  else {
185  dst.resizeAndZero(xsz);
186  if (x.hasFastAccess())
187  SACADO_FAD_DERIV_LOOP(i,xsz)
188  for (int j=0; j<VecNum; ++j)
189  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
190  else
191  SACADO_FAD_DERIV_LOOP(i,xsz)
192  for (int j=0; j<VecNum; ++j)
193  dst.fastAccessDx(i,j) = x.dx(i,j);
194  }
195  }
196 
197  for (int j=0; j<VecNum; ++j)
198  dst.val(j) += x.val(j);
199  }
200 
202  template <typename SrcType>
203  KOKKOS_INLINE_FUNCTION
204  static void assign_minus_equal(DstType& dst, const SrcType& x)
205  {
206  const int xsz = x.size(), sz = dst.size();
207 
208 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
209  if ((xsz != sz) && (xsz != 0) && (sz != 0))
210  throw "Fad Error: Attempt to assign with incompatible sizes";
211 #endif
212 
213  if (xsz) {
214  if (sz) {
215  if (x.hasFastAccess())
216  SACADO_FAD_DERIV_LOOP(i,sz)
217  for (int j=0; j<VecNum; ++j)
218  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
219  else
220  SACADO_FAD_DERIV_LOOP(i,sz)
221  for (int j=0; j<VecNum; ++j)
222  dst.fastAccessDx(i,j) -= x.dx(i,j);
223  }
224  else {
225  dst.resizeAndZero(xsz);
226  if (x.hasFastAccess())
227  SACADO_FAD_DERIV_LOOP(i,xsz)
228  for (int j=0; j<VecNum; ++j)
229  dst.fastAccessDx(i,j) = -x.fastAccessDx(i,j);
230  else
231  SACADO_FAD_DERIV_LOOP(i,xsz)
232  for (int j=0; j<VecNum; ++j)
233  dst.fastAccessDx(i,j) = -x.dx(i,j);
234  }
235  }
236 
237  for (int j=0; j<VecNum; ++j)
238  dst.val(j) -= x.val(j);
239  }
240 
242  template <typename SrcType>
243  KOKKOS_INLINE_FUNCTION
244  static void assign_times_equal(DstType& dst, const SrcType& x)
245  {
246  const int xsz = x.size(), sz = dst.size();
247  const value_type xval = x.val();
248  const value_type v = dst.val();
249 
250 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
251  if ((xsz != sz) && (xsz != 0) && (sz != 0))
252  throw "Fad Error: Attempt to assign with incompatible sizes";
253 #endif
254 
255  if (xsz) {
256  if (sz) {
257  if (x.hasFastAccess())
258  SACADO_FAD_DERIV_LOOP(i,sz)
259  for (int j=0; j<VecNum; ++j)
260  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
261  else
262  SACADO_FAD_DERIV_LOOP(i,sz)
263  for (int j=0; j<VecNum; ++j)
264  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.dx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
265  }
266  else {
267  dst.resizeAndZero(xsz);
268  if (x.hasFastAccess())
269  SACADO_FAD_DERIV_LOOP(i,xsz)
270  for (int j=0; j<VecNum; ++j)
271  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j);
272  else
273  SACADO_FAD_DERIV_LOOP(i,xsz)
274  for (int j=0; j<VecNum; ++j)
275  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.dx(i,j);
276  }
277  }
278  else {
279  if (sz) {
280  SACADO_FAD_DERIV_LOOP(i,sz)
281  for (int j=0; j<VecNum; ++j)
282  dst.fastAccessDx(i,j) *= xval.fastAccessCoeff(j);
283  }
284  }
285 
286  for (int j=0; j<VecNum; ++j)
287  dst.val(j) *= xval.fastAccessCoeff(j);
288  }
289 
291  template <typename SrcType>
292  KOKKOS_INLINE_FUNCTION
293  static void assign_divide_equal(DstType& dst, const SrcType& x)
294  {
295  const int xsz = x.size(), sz = dst.size();
296  const value_type xval = x.val();
297  const value_type v = dst.val();
298 
299 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
300  if ((xsz != sz) && (xsz != 0) && (sz != 0))
301  throw "Fad Error: Attempt to assign with incompatible sizes";
302 #endif
303 
304  if (xsz) {
305  const value_type xval2 = xval*xval;
306  if (sz) {
307  if (x.hasFastAccess())
308  SACADO_FAD_DERIV_LOOP(i,sz)
309  for (int j=0; j<VecNum; ++j)
310  dst.fastAccessDx(i,j) =
311  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) ) / xval2.fastAccessCoeff(j);
312  else
313  SACADO_FAD_DERIV_LOOP(i,sz)
314  for (int j=0; j<VecNum; ++j)
315  dst.fastAccessDx(i,j) =
316  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.dx(i,j) ) / xval2.fastAccessCoeff(j);
317  }
318  else {
319  dst.resizeAndZero(xsz);
320  if (x.hasFastAccess())
321  SACADO_FAD_DERIV_LOOP(i,xsz)
322  for (int j=0; j<VecNum; ++j)
323  dst.fastAccessDx(i,j) = - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) / xval2.fastAccessCoeff(j);
324  else
325  SACADO_FAD_DERIV_LOOP(i,xsz)
326  for (int j=0; j<VecNum; ++j)
327  dst.fastAccessDx(i,j) = -v.fastAccessCoeff(j)*x.dx(i,j) / xval2.fastAccessCoeff(j);
328  }
329  }
330  else {
331  if (sz) {
332  SACADO_FAD_DERIV_LOOP(i,sz)
333  for (int j=0; j<VecNum; ++j)
334  dst.fastAccessDx(i,j) /= xval.fastAccessCoeff(j);
335  }
336  }
337 
338  for (int j=0; j<VecNum; ++j)
339  dst.val(j) /= xval.fastAccessCoeff(j);
340  }
341 
342  };
343 
348  template <typename DstType>
349  class ExprAssign<
350  DstType,
351  typename std::enable_if<
352  Sacado::IsStaticallySized<DstType>::value &&
353  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
354  >::type
355  > {
356  public:
357 
359  typedef typename DstType::value_type value_type;
360 
361  // MP::Vector size (assuming static, because that's all we care about)
362  static const int VecNum = Sacado::StaticSize<value_type>::value;
363 
365  template <typename SrcType>
366  KOKKOS_INLINE_FUNCTION
367  static void assign_equal(DstType& dst, const SrcType& x)
368  {
369  const int sz = dst.size();
370  SACADO_FAD_DERIV_LOOP(i,sz)
371  for (int j=0; j<VecNum; ++j)
372  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
373  for (int j=0; j<VecNum; ++j)
374  dst.val(j) = x.val(j);
375  }
376 
378  template <typename SrcType>
379  KOKKOS_INLINE_FUNCTION
380  static void assign_plus_equal(DstType& dst, const SrcType& x)
381  {
382  const int sz = dst.size();
383  SACADO_FAD_DERIV_LOOP(i,sz)
384  for (int j=0; j<VecNum; ++j)
385  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
386  for (int j=0; j<VecNum; ++j)
387  dst.val(j) += x.val(j);
388  }
389 
391  template <typename SrcType>
392  KOKKOS_INLINE_FUNCTION
393  static void assign_minus_equal(DstType& dst, const SrcType& x)
394  {
395  const int sz = dst.size();
396  SACADO_FAD_DERIV_LOOP(i,sz)
397  for (int j=0; j<VecNum; ++j)
398  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
399  for (int j=0; j<VecNum; ++j)
400  dst.val(j) -= x.val(j);
401  }
402 
404  template <typename SrcType>
405  KOKKOS_INLINE_FUNCTION
406  static void assign_times_equal(DstType& dst, const SrcType& x)
407  {
408  const int sz = dst.size();
409  const value_type xval = x.val();
410  const value_type v = dst.val();
411  SACADO_FAD_DERIV_LOOP(i,sz)
412  for (int j=0; j<VecNum; ++j)
413  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
414  for (int j=0; j<VecNum; ++j)
415  dst.val(j) *= xval.fastAccessCoeff(j);
416  }
417 
419  template <typename SrcType>
420  KOKKOS_INLINE_FUNCTION
421  static void assign_divide_equal(DstType& dst, const SrcType& x)
422  {
423  const int sz = dst.size();
424  const value_type xval = x.val();
425  const value_type xval2 = xval*xval;
426  const value_type v = dst.val();
427  SACADO_FAD_DERIV_LOOP(i,sz)
428  for (int j=0; j<VecNum; ++j)
429  dst.fastAccessDx(i,j) =
430  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) )/ xval2.fastAccessCoeff(j);
431  for (int j=0; j<VecNum; ++j)
432  dst.val(j) /= xval.fastAccessCoeff(j);
433  }
434 
435  };
436 
437  } // namespace Exp
438  } // namespace Fad
439 
440 } // namespace Sacado
441 
442 // Specialize expression template operators to add similar extensions
443 #include "Sacado_Fad_Exp_Ops.hpp"
444 
445 #define FAD_UNARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,FASTACCESSDX) \
446 namespace Sacado { \
447  namespace Fad { \
448  namespace Exp { \
449  \
450  template <typename T> \
451  class OP< T,ExprSpecMPVector > : \
452  public Expr< OP< T,ExprSpecMPVector > > { \
453  public: \
454  \
455  typedef typename std::remove_cv<T>::type ExprT; \
456  typedef typename ExprT::value_type value_type; \
457  typedef typename ExprT::scalar_type scalar_type; \
458  \
459  typedef typename value_type::value_type val_type; \
460  \
461  typedef ExprSpecMPVector expr_spec_type; \
462  \
463  KOKKOS_INLINE_FUNCTION \
464  OP(const T& expr_) : expr(expr_) {} \
465  \
466  KOKKOS_INLINE_FUNCTION \
467  int size() const { return expr.size(); } \
468  \
469  KOKKOS_INLINE_FUNCTION \
470  bool hasFastAccess() const { \
471  return expr.hasFastAccess(); \
472  } \
473  \
474  KOKKOS_INLINE_FUNCTION \
475  value_type val() const { \
476  USING \
477  return MPVALUE; \
478  } \
479  \
480  KOKKOS_INLINE_FUNCTION \
481  val_type val(int j) const { \
482  USING \
483  return VALUE; \
484  } \
485  \
486  KOKKOS_INLINE_FUNCTION \
487  val_type dx(int i, int j) const { \
488  USING \
489  return DX; \
490  } \
491  \
492  KOKKOS_INLINE_FUNCTION \
493  val_type fastAccessDx(int i, int j) const { \
494  USING \
495  return FASTACCESSDX; \
496  } \
497  \
498  protected: \
499  \
500  const T& expr; \
501  }; \
502  \
503  } \
504  } \
505  \
506 }
507 
508 FAD_UNARYOP_MACRO(operator+,
509  UnaryPlusOp,
510  ;,
511  expr.val(),
512  expr.val(j),
513  expr.dx(i,j),
514  expr.fastAccessDx(i,j))
515 FAD_UNARYOP_MACRO(operator-,
517  ;,
518  -expr.val(),
519  -expr.val(j),
520  -expr.dx(i,j),
521  -expr.fastAccessDx(i,j))
524  using std::exp;,
525  exp(expr.val()),
526  exp(expr.val(j)),
527  exp(expr.val(j))*expr.dx(i,j),
528  exp(expr.val(j))*expr.fastAccessDx(i,j))
530  LogOp,
531  using std::log;,
532  log(expr.val()),
533  log(expr.val(j)),
534  expr.dx(i,j)/expr.val(j),
535  expr.fastAccessDx(i,j)/expr.val(j))
538  using std::log10; using std::log;,
539  log10(expr.val()),
540  log10(expr.val(j)),
541  expr.dx(i,j)/( log(value_type(10))*expr.val()),
542  expr.fastAccessDx(i,j) / ( log(value_type(10))*expr.val()))
545  using std::sqrt;,
546  sqrt(expr.val()),
547  sqrt(expr.val(j)),
548  expr.dx(i,j)/(value_type(2)* sqrt(expr.val())),
549  expr.fastAccessDx(i,j)/(value_type(2)* sqrt(expr.val())))
552  using std::cos; using std::sin;,
553  cos(expr.val()),
554  cos(expr.val(j)),
555  -expr.dx(i,j)* sin(expr.val()),
556  -expr.fastAccessDx(i,j)* sin(expr.val()))
559  using std::cos; using std::sin;,
560  sin(expr.val()),
561  sin(expr.val(j)),
562  expr.dx(i,j)* cos(expr.val()),
563  expr.fastAccessDx(i,j)* cos(expr.val()))
566  using std::tan;,
567  tan(expr.val()),
568  tan(expr.val(j)),
569  expr.dx(i,j)*
570  (value_type(1)+ tan(expr.val())* tan(expr.val())),
571  expr.fastAccessDx(i,j)*
572  (value_type(1)+ tan(expr.val())* tan(expr.val())))
575  using std::acos; using std::sqrt;,
576  acos(expr.val()),
577  acos(expr.val(j)),
578  -expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
579  -expr.fastAccessDx(i,j) /
580  sqrt(value_type(1)-expr.val()*expr.val()))
583  using std::asin; using std::sqrt;,
584  asin(expr.val()),
585  asin(expr.val(j)),
586  expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
587  expr.fastAccessDx(i,j) /
588  sqrt(value_type(1)-expr.val()*expr.val()))
591  using std::atan;,
592  atan(expr.val()),
593  atan(expr.val(j)),
594  expr.dx(i,j)/(value_type(1)+expr.val()*expr.val()),
595  expr.fastAccessDx(i,j)/(value_type(1)+expr.val()*expr.val()))
598  using std::cosh; using std::sinh;,
599  cosh(expr.val()),
600  cosh(expr.val(j)),
601  expr.dx(i,j)* sinh(expr.val()),
602  expr.fastAccessDx(i,j)* sinh(expr.val()))
603 FAD_UNARYOP_MACRO(sinh,
605  using std::cosh; using std::sinh;,
606  sinh(expr.val()),
607  sinh(expr.val(j)),
608  expr.dx(i,j)* cosh(expr.val()),
609  expr.fastAccessDx(i,j)* cosh(expr.val()))
612  using std::tanh; using std::cosh;,
613  tanh(expr.val()),
614  tanh(expr.val(j)),
615  expr.dx(i,j)/( cosh(expr.val())* cosh(expr.val())),
616  expr.fastAccessDx(i,j) /
617  ( cosh(expr.val())* cosh(expr.val())))
620  using std::acosh; using std::sqrt;,
621  acosh(expr.val()),
622  acosh(expr.val(j)),
623  expr.dx(i,j)/ sqrt((expr.val()-value_type(1)) *
624  (expr.val()+value_type(1))),
625  expr.fastAccessDx(i,j)/ sqrt((expr.val()-value_type(1)) *
626  (expr.val()+value_type(1))))
629  using std::asinh; using std::sqrt;,
630  asinh(expr.val()),
631  asinh(expr.val(j)),
632  expr.dx(i,j)/ sqrt(value_type(1)+expr.val()*expr.val()),
633  expr.fastAccessDx(i,j)/ sqrt(value_type(1)+
634  expr.val()*expr.val()))
637  using std::atanh;,
638  atanh(expr.val()),
639  atanh(expr.val(j)),
640  expr.dx(i,j)/(value_type(1)-expr.val()*expr.val()),
641  expr.fastAccessDx(i,j)/(value_type(1)-
642  expr.val()*expr.val()))
645  using std::abs;,
646  abs(expr.val()),
647  abs(expr.val(j)),
648  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
649  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
652  using std::fabs;,
653  fabs(expr.val()),
654  fabs(expr.val(j)),
655  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
656  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
659  using std::cbrt;,
660  cbrt(expr.val()),
661  cbrt(expr.val(j)),
662  expr.dx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())),
663  expr.fastAccessDx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())))
664 
665 #undef FAD_UNARYOP_MACRO
666 
667 namespace Sacado {
668  namespace Fad {
669  namespace Exp {
670 
671  // For MP::Vector scalar type, promote constants up to expression's value
672  // type. If the constant type is the same as the value type, we can store
673  // the constant as a reference. If it isn't, we must copy it into a new
674  // value type object. We do this so that we can always access the constant
675  // as a value type.
676  template <typename ConstType, typename ValueType>
677  struct ConstTypeRef {
678  typedef ValueType type;
679  };
680 
681  template <typename ValueType>
682  struct ConstTypeRef<ValueType, ValueType> {
683  typedef ValueType& type;
684  };
685  }
686  }
687 }
688 
689 #define FAD_BINARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,CDX1,CDX2,FASTACCESSDX,MPVAL_CONST_DX_1,MPVAL_CONST_DX_2,VAL_CONST_DX_1,VAL_CONST_DX_2,CONST_DX_1,CONST_DX_2,CONST_FASTACCESSDX_1,CONST_FASTACCESSDX_2) \
690 namespace Sacado { \
691  namespace Fad { \
692  namespace Exp { \
693  \
694  template <typename T1, typename T2 > \
695  class OP< T1, T2, false, false, ExprSpecMPVector > : \
696  public Expr< OP< T1, T2, false, false, ExprSpecMPVector > > { \
697  public: \
698  \
699  typedef typename std::remove_cv<T1>::type ExprT1; \
700  typedef typename std::remove_cv<T2>::type ExprT2; \
701  typedef typename ExprT1::value_type value_type_1; \
702  typedef typename ExprT2::value_type value_type_2; \
703  typedef typename Sacado::Promote<value_type_1, \
704  value_type_2>::type value_type; \
705  \
706  typedef typename ExprT1::scalar_type scalar_type_1; \
707  typedef typename ExprT2::scalar_type scalar_type_2; \
708  typedef typename Sacado::Promote<scalar_type_1, \
709  scalar_type_2>::type scalar_type; \
710  \
711  typedef typename value_type::value_type val_type; \
712  \
713  typedef ExprSpecMPVector expr_spec_type; \
714  \
715  KOKKOS_INLINE_FUNCTION \
716  OP(const T1& expr1_, const T2& expr2_) : \
717  expr1(expr1_), expr2(expr2_) {} \
718  \
719  KOKKOS_INLINE_FUNCTION \
720  int size() const { \
721  const int sz1 = expr1.size(), sz2 = expr2.size(); \
722  return sz1 > sz2 ? sz1 : sz2; \
723  } \
724  \
725  KOKKOS_INLINE_FUNCTION \
726  bool hasFastAccess() const { \
727  return expr1.hasFastAccess() && expr2.hasFastAccess(); \
728  } \
729  \
730  KOKKOS_INLINE_FUNCTION \
731  value_type val() const { \
732  USING \
733  return MPVALUE; \
734  } \
735  \
736  KOKKOS_INLINE_FUNCTION \
737  val_type val(int j) const { \
738  USING \
739  return VALUE; \
740  } \
741  \
742  KOKKOS_INLINE_FUNCTION \
743  val_type dx(int i, int j) const { \
744  USING \
745  const int sz1 = expr1.size(), sz2 = expr2.size(); \
746  if (sz1 > 0 && sz2 > 0) \
747  return DX; \
748  else if (sz1 > 0) \
749  return CDX2; \
750  else \
751  return CDX1; \
752  } \
753  \
754  KOKKOS_INLINE_FUNCTION \
755  val_type fastAccessDx(int i, int j) const { \
756  USING \
757  return FASTACCESSDX; \
758  } \
759  \
760  protected: \
761  \
762  const T1& expr1; \
763  const T2& expr2; \
764  \
765  }; \
766  \
767  template <typename T1, typename T2> \
768  class OP< T1, T2, false, true, ExprSpecMPVector > : \
769  public Expr< OP< T1, T2, false, true, ExprSpecMPVector > > { \
770  public: \
771  \
772  typedef typename std::remove_cv<T1>::type ExprT1; \
773  typedef T2 ConstT; \
774  typedef typename ExprT1::value_type value_type; \
775  typedef typename ExprT1::scalar_type scalar_type; \
776  \
777  typedef typename value_type::value_type val_type; \
778  \
779  typedef ExprSpecMPVector expr_spec_type; \
780  \
781  KOKKOS_INLINE_FUNCTION \
782  OP(const T1& expr1_, const ConstT& c_) : \
783  expr1(expr1_), c(c_) {} \
784  \
785  KOKKOS_INLINE_FUNCTION \
786  int size() const { \
787  return expr1.size(); \
788  } \
789  \
790  KOKKOS_INLINE_FUNCTION \
791  bool hasFastAccess() const { \
792  return expr1.hasFastAccess(); \
793  } \
794  \
795  KOKKOS_INLINE_FUNCTION \
796  value_type val() const { \
797  USING \
798  return MPVAL_CONST_DX_2; \
799  } \
800  \
801  KOKKOS_INLINE_FUNCTION \
802  val_type val(int j) const { \
803  USING \
804  return VAL_CONST_DX_2; \
805  } \
806  \
807  KOKKOS_INLINE_FUNCTION \
808  val_type dx(int i, int j) const { \
809  USING \
810  return CONST_DX_2; \
811  } \
812  \
813  KOKKOS_INLINE_FUNCTION \
814  val_type fastAccessDx(int i, int j) const { \
815  USING \
816  return CONST_FASTACCESSDX_2; \
817  } \
818  \
819  protected: \
820  \
821  const T1& expr1; \
822  const typename ConstTypeRef<ConstT,value_type>::type c; \
823  }; \
824  \
825  template <typename T1, typename T2> \
826  class OP< T1, T2, true, false,ExprSpecMPVector > : \
827  public Expr< OP< T1, T2, true, false, ExprSpecMPVector > > { \
828  public: \
829  \
830  typedef typename std::remove_cv<T2>::type ExprT2; \
831  typedef T1 ConstT; \
832  typedef typename ExprT2::value_type value_type; \
833  typedef typename ExprT2::scalar_type scalar_type; \
834  \
835  typedef typename value_type::value_type val_type; \
836  \
837  typedef ExprSpecMPVector expr_spec_type; \
838  \
839  KOKKOS_INLINE_FUNCTION \
840  OP(const ConstT& c_, const T2& expr2_) : \
841  c(c_), expr2(expr2_) {} \
842  \
843  KOKKOS_INLINE_FUNCTION \
844  int size() const { \
845  return expr2.size(); \
846  } \
847  \
848  KOKKOS_INLINE_FUNCTION \
849  bool hasFastAccess() const { \
850  return expr2.hasFastAccess(); \
851  } \
852  \
853  KOKKOS_INLINE_FUNCTION \
854  value_type val() const { \
855  USING \
856  return MPVAL_CONST_DX_1; \
857  } \
858  \
859  KOKKOS_INLINE_FUNCTION \
860  val_type val(int j) const { \
861  USING \
862  return VAL_CONST_DX_1; \
863  } \
864  \
865  KOKKOS_INLINE_FUNCTION \
866  val_type dx(int i, int j) const { \
867  USING \
868  return CONST_DX_1; \
869  } \
870  \
871  KOKKOS_INLINE_FUNCTION \
872  val_type fastAccessDx(int i, int j) const { \
873  USING \
874  return CONST_FASTACCESSDX_1; \
875  } \
876  \
877  protected: \
878  \
879  const typename ConstTypeRef<ConstT,value_type>::type c; \
880  const T2& expr2; \
881  }; \
882  \
883  } \
884  } \
885  \
886 }
887 
888 
889 FAD_BINARYOP_MACRO(operator+,
890  AdditionOp,
891  ;,
892  expr1.val() + expr2.val(),
893  expr1.val(j) + expr2.val(j),
894  expr1.dx(i,j) + expr2.dx(i,j),
895  expr2.dx(i,j),
896  expr1.dx(i,j),
897  expr1.fastAccessDx(i,j) + expr2.fastAccessDx(i,j),
898  c + expr2.val(),
899  expr1.val() + c,
900  c.fastAccessCoeff(j) + expr2.val(j),
901  expr1.val(j) + c.fastAccessCoeff(j),
902  expr2.dx(i,j),
903  expr1.dx(i,j),
904  expr2.fastAccessDx(i,j),
905  expr1.fastAccessDx(i,j))
906 FAD_BINARYOP_MACRO(operator-,
908  ;,
909  expr1.val() - expr2.val(),
910  expr1.val(j) - expr2.val(j),
911  expr1.dx(i,j) - expr2.dx(i,j),
912  -expr2.dx(i,j),
913  expr1.dx(i,j),
914  expr1.fastAccessDx(i,j) - expr2.fastAccessDx(i,j),
915  c - expr2.val(),
916  expr1.val() - c,
917  c.fastAccessCoeff(j) - expr2.val(j),
918  expr1.val(j) - c.fastAccessCoeff(j),
919  -expr2.dx(i,j),
920  expr1.dx(i,j),
921  -expr2.fastAccessDx(i,j),
922  expr1.fastAccessDx(i,j))
923 FAD_BINARYOP_MACRO(operator*,
925  ;,
926  expr1.val() * expr2.val(),
927  expr1.val(j) * expr2.val(j),
928  expr1.val(j)*expr2.dx(i,j) + expr1.dx(i,j)*expr2.val(j),
929  expr1.val(j)*expr2.dx(i,j),
930  expr1.dx(i,j)*expr2.val(j),
931  expr1.val(j)*expr2.fastAccessDx(i,j) +
932  expr1.fastAccessDx(i,j)*expr2.val(j),
933  c * expr2.val(),
934  expr1.val() * c,
935  c.fastAccessCoeff(j) * expr2.val(j),
936  expr1.val(j) * c.fastAccessCoeff(j),
937  c.fastAccessCoeff(j)*expr2.dx(i,j),
938  expr1.dx(i,j)*c.fastAccessCoeff(j),
939  c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j),
940  expr1.fastAccessDx(i,j)*c.fastAccessCoeff(j))
941 FAD_BINARYOP_MACRO(operator/,
943  ;,
944  expr1.val() / expr2.val(),
945  expr1.val(j) / expr2.val(j),
946  (expr1.dx(i,j)*expr2.val(j) - expr2.dx(i,j)*expr1.val(j)) /
947  (expr2.val(j)*expr2.val(j)),
948  -expr2.dx(i,j)*expr1.val(j) / (expr2.val(j)*expr2.val(j)),
949  expr1.dx(i,j)/expr2.val(j),
950  (expr1.fastAccessDx(i,j)*expr2.val(j) -
951  expr2.fastAccessDx(i,j)*expr1.val(j)) /
952  (expr2.val(j)*expr2.val(j)),
953  c / expr2.val(),
954  expr1.val() / c,
955  c.fastAccessCoeff(j) / expr2.val(j),
956  expr1.val(j) / c.fastAccessCoeff(j),
957  -expr2.dx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
958  expr1.dx(i,j)/c.fastAccessCoeff(j),
959  -expr2.fastAccessDx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
960  expr1.fastAccessDx(i,j)/c.fastAccessCoeff(j))
963  using std::atan2;,
964  atan2(expr1.val(), expr2.val()),
965  atan2(expr1.val(j), expr2.val(j)),
966  (expr2.val(j)*expr1.dx(i,j) - expr1.val(j)*expr2.dx(i,j))/
967  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
968  -expr1.val(j)*expr2.dx(i,j)/
969  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
970  expr2.val(j)*expr1.dx(i,j)/
971  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
972  (expr2.val(j)*expr1.fastAccessDx(i,j) - expr1.val(j)*expr2.fastAccessDx(i,j))/
973  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
974  atan2(c, expr2.val()),
975  atan2(expr1.val(), c),
976  atan2(c.fastAccessCoeff(j), expr2.val(j)),
977  atan2(expr1.val(j), c.fastAccessCoeff(j)),
978  (-c.fastAccessCoeff(j)*expr2.dx(i,j)) / (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
979  (c.fastAccessCoeff(j)*expr1.dx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)),
980  (-c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j))/ (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
981  (c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)))
982 // FAD_BINARYOP_MACRO(pow,
983 // PowerOp,
984 // using std::pow; using std::log;,
985 // pow(expr1.val(), expr2.val()),
986 // pow(expr1.val(j), expr2.val(j)),
987 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
988 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
989 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ),
990 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
991 // pow(c, expr2.val()),
992 // pow(expr1.val(), c),
993 // pow(c.fastAccessCoeff(j), expr2.val(j)),
994 // pow(expr1.val(j), c.fastAccessCoeff(j)),
995 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
996 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ),
997 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
998 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)))) )
1001  ;,
1002  if_then_else( expr1.val() >= expr2.val(), expr1.val(), expr2.val() ),
1003  if_then_else( expr1.val(j) >= expr2.val(j), expr1.val(j), expr2.val(j) ),
1004  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1005  if_then_else( expr1.val(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1006  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1007  if_then_else( expr1.val(j) >= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1008  if_then_else( c >= expr2.val(), c, expr2.val() ),
1009  if_then_else( expr1.val() >= c, expr1.val(), c ),
1010  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1011  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1012  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1013  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0.0) ),
1014  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.fastAccessDx(i,j) ),
1015  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0.0) ) )
1018  ;,
1019  if_then_else( expr1.val() <= expr2.val(), expr1.val(), expr2.val() ),
1020  if_then_else( expr1.val(j) <= expr2.val(j), expr1.val(j), expr2.val(j) ),
1021  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1022  if_then_else( expr1.val(j) <= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1023  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1024  if_then_else( expr1.val(j) <= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1025  if_then_else( c <= expr2.val(), c, expr2.val() ),
1026  if_then_else( expr1.val() <= c, expr1.val(), c ),
1027  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1028  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1029  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.dx(i,j) ),
1030  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0) ),
1031  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.fastAccessDx(i,j) ),
1032  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0) ) )
1033 
1034 // Special handling for std::pow() to provide specializations of PowerOp for
1035 // "simd" value types that use if_then_else(). The only reason for not using
1036 // if_then_else() always is to avoid evaluating the derivative if the value is
1037 // zero to avoid throwing FPEs.
1038 namespace Sacado {
1039  namespace Fad {
1040  namespace Exp {
1041 
1042  //
1043  // Implementation for simd type using if_then_else()
1044  //
1045  template <typename T1, typename T2>
1046  class PowerOp< T1, T2, false, false, ExprSpecMPVector, true > :
1047  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector, true > > {
1048  public:
1049 
1050  typedef typename std::remove_cv<T1>::type ExprT1;
1051  typedef typename std::remove_cv<T2>::type ExprT2;
1052  typedef typename ExprT1::value_type value_type_1;
1053  typedef typename ExprT2::value_type value_type_2;
1054  typedef typename Sacado::Promote<value_type_1,
1055  value_type_2>::type value_type;
1056 
1057  typedef typename ExprT1::scalar_type scalar_type_1;
1058  typedef typename ExprT2::scalar_type scalar_type_2;
1059  typedef typename Sacado::Promote<scalar_type_1,
1060  scalar_type_2>::type scalar_type;
1061 
1062  typedef typename value_type::value_type val_type;
1063 
1064  typedef ExprSpecMPVector expr_spec_type;
1065 
1066  KOKKOS_INLINE_FUNCTION
1067  PowerOp(const T1& expr1_, const T2& expr2_) :
1068  expr1(expr1_), expr2(expr2_) {}
1069 
1070  KOKKOS_INLINE_FUNCTION
1071  int size() const {
1072  const int sz1 = expr1.size(), sz2 = expr2.size();
1073  return sz1 > sz2 ? sz1 : sz2;
1074  }
1075 
1076  KOKKOS_INLINE_FUNCTION
1077  bool hasFastAccess() const {
1078  return expr1.hasFastAccess() && expr2.hasFastAccess();
1079  }
1080 
1081  KOKKOS_INLINE_FUNCTION
1082  value_type val() const {
1083  using std::pow;
1084  return pow(expr1.val(), expr2.val());
1085  }
1086 
1087  KOKKOS_INLINE_FUNCTION
1088  val_type val(int j) const {
1089  using std::pow;
1090  return pow(expr1.val(j), expr2.val(j));
1091  }
1092 
1093  KOKKOS_INLINE_FUNCTION
1094  val_type dx(int i, int j) const {
1095  using std::pow; using std::log;
1096  const int sz1 = expr1.size(), sz2 = expr2.size();
1097  if (sz1 > 0 && sz2 > 0)
1098  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1099  else if (sz1 > 0)
1100  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1101  // It seems less accurate and caused convergence problems in some codes
1102  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) );
1103  else
1104  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1105  }
1106 
1107  KOKKOS_INLINE_FUNCTION
1108  val_type fastAccessDx(int i, int j) const {
1109  using std::pow; using std::log;
1110  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1111  }
1112 
1113  protected:
1114 
1115  const T1& expr1;
1116  const T2& expr2;
1117 
1118  };
1119 
1120  template <typename T1, typename T2>
1121  class PowerOp< T1, T2, false, true, ExprSpecMPVector, true >
1122  : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector, true > > {
1123  public:
1124 
1125  typedef typename std::remove_cv<T1>::type ExprT1;
1126  typedef T2 ConstT;
1127  typedef typename ExprT1::value_type value_type;
1128  typedef typename ExprT1::scalar_type scalar_type;
1129 
1130  typedef typename value_type::value_type val_type;
1131 
1132  typedef ExprSpecMPVector expr_spec_type;
1133 
1134  KOKKOS_INLINE_FUNCTION
1135  PowerOp(const T1& expr1_, const ConstT& c_) :
1136  expr1(expr1_), c(c_) {}
1137 
1138  KOKKOS_INLINE_FUNCTION
1139  int size() const {
1140  return expr1.size();
1141  }
1142 
1143  KOKKOS_INLINE_FUNCTION
1144  bool hasFastAccess() const {
1145  return expr1.hasFastAccess();
1146  }
1147 
1148  KOKKOS_INLINE_FUNCTION
1149  value_type val() const {
1150  using std::pow;
1151  return pow(expr1.val(), c);
1152  }
1153 
1154  KOKKOS_INLINE_FUNCTION
1155  val_type val(int j) const {
1156  using std::pow;
1157  return pow(expr1.val(j), c.fastAccessCoeff(j));
1158  }
1159 
1160  KOKKOS_INLINE_FUNCTION
1161  val_type dx(int i, int j) const {
1162  using std::pow;
1163  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1164  // It seems less accurate and caused convergence problems in some codes
1165  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) );
1166  }
1167 
1168  KOKKOS_INLINE_FUNCTION
1169  val_type fastAccessDx(int i, int j) const {
1170  using std::pow;
1171  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1172  // It seems less accurate and caused convergence problems in some codes
1173  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) );
1174  }
1175 
1176  protected:
1177 
1178  const T1& expr1;
1179  const ConstT& c;
1180  };
1181 
1182  template <typename T1, typename T2>
1183  class PowerOp< T1, T2, true, false, ExprSpecMPVector, true >
1184  : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector, true > > {
1185  public:
1186 
1187  typedef typename std::remove_cv<T2>::type ExprT2;
1188  typedef T1 ConstT;
1189  typedef typename ExprT2::value_type value_type;
1190  typedef typename ExprT2::scalar_type scalar_type;
1191 
1192  typedef typename value_type::value_type val_type;
1193 
1194  typedef ExprSpecMPVector expr_spec_type;
1195 
1196  KOKKOS_INLINE_FUNCTION
1197  PowerOp(const ConstT& c_, const T2& expr2_) :
1198  c(c_), expr2(expr2_) {}
1199 
1200  KOKKOS_INLINE_FUNCTION
1201  int size() const {
1202  return expr2.size();
1203  }
1204 
1205  KOKKOS_INLINE_FUNCTION
1206  bool hasFastAccess() const {
1207  return expr2.hasFastAccess();
1208  }
1209 
1210  KOKKOS_INLINE_FUNCTION
1211  value_type val() const {
1212  using std::pow;
1213  return pow(c, expr2.val());
1214  }
1215 
1216  KOKKOS_INLINE_FUNCTION
1217  val_type val(int j) const {
1218  using std::pow;
1219  return pow(c.fastAccessCoeff(j), expr2.val(j));
1220  }
1221 
1222  KOKKOS_INLINE_FUNCTION
1223  val_type dx(int i, int j) const {
1224  using std::pow; using std::log;
1225  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1226  }
1227 
1228  KOKKOS_INLINE_FUNCTION
1229  val_type fastAccessDx(int i, int j) const {
1230  using std::pow; using std::log;
1231  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1232  }
1233 
1234  protected:
1235 
1236  const ConstT& c;
1237  const T2& expr2;
1238  };
1239 
1240  //
1241  // Specialization for scalar types using ternary operator
1242  //
1243 
1244  template <typename T1, typename T2>
1245  class PowerOp< T1, T2, false, false, ExprSpecMPVector, false > :
1246  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector, false > > {
1247  public:
1248 
1249  typedef typename std::remove_cv<T1>::type ExprT1;
1250  typedef typename std::remove_cv<T2>::type ExprT2;
1251  typedef typename ExprT1::value_type value_type_1;
1252  typedef typename ExprT2::value_type value_type_2;
1253  typedef typename Sacado::Promote<value_type_1,
1254  value_type_2>::type value_type;
1255 
1256  typedef typename ExprT1::scalar_type scalar_type_1;
1257  typedef typename ExprT2::scalar_type scalar_type_2;
1258  typedef typename Sacado::Promote<scalar_type_1,
1259  scalar_type_2>::type scalar_type;
1260 
1261  typedef typename value_type::value_type val_type;
1262 
1263  typedef ExprSpecMPVector expr_spec_type;
1264 
1265  KOKKOS_INLINE_FUNCTION
1266  PowerOp(const T1& expr1_, const T2& expr2_) :
1267  expr1(expr1_), expr2(expr2_) {}
1268 
1269  KOKKOS_INLINE_FUNCTION
1270  int size() const {
1271  const int sz1 = expr1.size(), sz2 = expr2.size();
1272  return sz1 > sz2 ? sz1 : sz2;
1273  }
1274 
1275  KOKKOS_INLINE_FUNCTION
1276  bool hasFastAccess() const {
1277  return expr1.hasFastAccess() && expr2.hasFastAccess();
1278  }
1279 
1280  KOKKOS_INLINE_FUNCTION
1281  value_type val() const {
1282  using std::pow;
1283  return pow(expr1.val(), expr2.val());
1284  }
1285 
1286  KOKKOS_INLINE_FUNCTION
1287  val_type val(int j) const {
1288  using std::pow;
1289  return pow(expr1.val(j), expr2.val(j));
1290  }
1291 
1292  KOKKOS_INLINE_FUNCTION
1293  val_type dx(int i, int j) const {
1294  using std::pow; using std::log;
1295  const int sz1 = expr1.size(), sz2 = expr2.size();
1296  if (sz1 > 0 && sz2 > 0)
1297  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1298  else if (sz1 > 0)
1299  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1300  // It seems less accurate and caused convergence problems in some codes
1301  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j)));
1302  else
1303  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1304  }
1305 
1306  KOKKOS_INLINE_FUNCTION
1307  val_type fastAccessDx(int i, int j) const {
1308  using std::pow; using std::log;
1309  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j)));
1310  }
1311 
1312  protected:
1313 
1314  const T1& expr1;
1315  const T2& expr2;
1316 
1317  };
1318 
1319  template <typename T1, typename T2>
1320  class PowerOp< T1, T2, false, true, ExprSpecMPVector, false >
1321  : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector, false > > {
1322  public:
1323 
1324  typedef typename std::remove_cv<T1>::type ExprT1;
1325  typedef T2 ConstT;
1326  typedef typename ExprT1::value_type value_type;
1327  typedef typename ExprT1::scalar_type scalar_type;
1328 
1329  typedef typename value_type::value_type val_type;
1330 
1331  typedef ExprSpecMPVector expr_spec_type;
1332 
1333  KOKKOS_INLINE_FUNCTION
1334  PowerOp(const T1& expr1_, const ConstT& c_) :
1335  expr1(expr1_), c(c_) {}
1336 
1337  KOKKOS_INLINE_FUNCTION
1338  int size() const {
1339  return expr1.size();
1340  }
1341 
1342  KOKKOS_INLINE_FUNCTION
1343  bool hasFastAccess() const {
1344  return expr1.hasFastAccess();
1345  }
1346 
1347  KOKKOS_INLINE_FUNCTION
1348  value_type val() const {
1349  using std::pow;
1350  return pow(expr1.val(), c);
1351  }
1352 
1353  KOKKOS_INLINE_FUNCTION
1354  val_type val(int j) const {
1355  using std::pow;
1356  return pow(expr1.val(j), c.fastAccessCoeff(j));
1357  }
1358 
1359  KOKKOS_INLINE_FUNCTION
1360  val_type dx(int i, int j) const {
1361  using std::pow;
1362  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1363  // It seems less accurate and caused convergence problems in some codes
1364  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)));
1365  }
1366 
1367  KOKKOS_INLINE_FUNCTION
1368  val_type fastAccessDx(int i, int j) const {
1369  using std::pow;
1370  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1371  // It seems less accurate and caused convergence problems in some codes
1372  return expr1.val(j) == val_type(0.0) ? val_type(0.0) : val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)));
1373  }
1374 
1375  protected:
1376 
1377  const T1& expr1;
1378  const ConstT& c;
1379  };
1380 
1381  template <typename T1, typename T2>
1382  class PowerOp< T1, T2, true, false, ExprSpecMPVector, false >
1383  : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector, false > > {
1384  public:
1385 
1386  typedef typename std::remove_cv<T2>::type ExprT2;
1387  typedef T1 ConstT;
1388  typedef typename ExprT2::value_type value_type;
1389  typedef typename ExprT2::scalar_type scalar_type;
1390 
1391  typedef typename value_type::value_type val_type;
1392 
1393  typedef ExprSpecMPVector expr_spec_type;
1394 
1395  KOKKOS_INLINE_FUNCTION
1396  PowerOp(const ConstT& c_, const T2& expr2_) :
1397  c(c_), expr2(expr2_) {}
1398 
1399  KOKKOS_INLINE_FUNCTION
1400  int size() const {
1401  return expr2.size();
1402  }
1403 
1404  KOKKOS_INLINE_FUNCTION
1405  bool hasFastAccess() const {
1406  return expr2.hasFastAccess();
1407  }
1408 
1409  KOKKOS_INLINE_FUNCTION
1410  value_type val() const {
1411  using std::pow;
1412  return pow(c, expr2.val());
1413  }
1414 
1415  KOKKOS_INLINE_FUNCTION
1416  val_type val(int j) const {
1417  using std::pow;
1418  return pow(c.fastAccessCoeff(j), expr2.val(j));
1419  }
1420 
1421  KOKKOS_INLINE_FUNCTION
1422  val_type dx(int i, int j) const {
1423  using std::pow; using std::log;
1424  return c.fastAccessCoeff(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j)));
1425  }
1426 
1427  KOKKOS_INLINE_FUNCTION
1428  val_type fastAccessDx(int i, int j) const {
1429  using std::pow; using std::log;
1430  return c.fastAccessCoeff(j) == val_type(0.0) ? val_type(0.0) : val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j)));
1431  }
1432 
1433  protected:
1434 
1435  const ConstT& c;
1436  const T2& expr2;
1437  };
1438 
1439  }
1440  }
1441 }
1442 
1443 //--------------------------if_then_else operator -----------------------
1444 // Can't use the above macros because it is a ternary operator (sort of).
1445 // Also, relies on C++11
1446 
1447 namespace Sacado {
1448  namespace Fad {
1449  namespace Exp {
1450 
1451  template <typename CondT, typename T1, typename T2>
1452  class IfThenElseOp< CondT,T1,T2,false,false,ExprSpecMPVector > :
1453  public Expr< IfThenElseOp< CondT, T1, T2, false, false, ExprSpecMPVector > > {
1454 
1455  public:
1456 
1457  typedef typename std::remove_cv<T1>::type ExprT1;
1458  typedef typename std::remove_cv<T2>::type ExprT2;
1461  typedef typename Sacado::Promote<value_type_1,
1463 
1466  typedef typename Sacado::Promote<scalar_type_1,
1468 
1470 
1472 
1473  KOKKOS_INLINE_FUNCTION
1474  IfThenElseOp(const CondT& cond_, const T1& expr1_, const T2& expr2_) :
1475  cond(cond_), expr1(expr1_), expr2(expr2_) {}
1476 
1477  KOKKOS_INLINE_FUNCTION
1478  int size() const {
1479  int sz1 = expr1.size(), sz2 = expr2.size();
1480  return sz1 > sz2 ? sz1 : sz2;
1481  }
1482 
1483  KOKKOS_INLINE_FUNCTION
1484  bool hasFastAccess() const {
1485  return expr1.hasFastAccess() && expr2.hasFastAccess();
1486  }
1487 
1488  KOKKOS_INLINE_FUNCTION
1489  value_type val() const {
1490  return if_then_else( cond, expr1.val(), expr2.val() );
1491  }
1492 
1493  KOKKOS_INLINE_FUNCTION
1494  val_type val(int j) const {
1495  return if_then_else( cond, expr1.val(j), expr2.val(j) );
1496  }
1497 
1498  KOKKOS_INLINE_FUNCTION
1499  val_type dx(int i, int j) const {
1500  return if_then_else( cond, expr1.dx(i,j), expr2.dx(i,j) );
1501  }
1502 
1503  KOKKOS_INLINE_FUNCTION
1504  val_type fastAccessDx(int i, int j) const {
1505  return if_then_else( cond, expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) );
1506  }
1507 
1508  protected:
1509 
1510  const CondT& cond;
1511  const T1& expr1;
1512  const T2& expr2;
1513 
1514  };
1515 
1516  template <typename CondT, typename T1, typename T2>
1517  class IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector> :
1518  public Expr< IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector > > {
1519 
1520  public:
1521 
1522  typedef typename std::remove_cv<T1>::type ExprT1;
1523  typedef T2 ConstT;
1524  typedef typename ExprT1::value_type value_type;
1526 
1528 
1529  KOKKOS_INLINE_FUNCTION
1530  IfThenElseOp(const CondT& cond_, const T1& expr1_, const ConstT& c_) :
1531  cond(cond_), expr1(expr1_), c(c_) {}
1532 
1533  KOKKOS_INLINE_FUNCTION
1534  int size() const {
1535  return expr1.size();
1536  }
1537 
1538  KOKKOS_INLINE_FUNCTION
1539  bool hasFastAccess() const {
1540  return expr1.hasFastAccess();
1541  }
1542 
1543  KOKKOS_INLINE_FUNCTION
1544  value_type val() const {
1545  return if_then_else( cond, expr1.val(), c );
1546  }
1547 
1548  KOKKOS_INLINE_FUNCTION
1549  val_type val(int j) const {
1550  return if_then_else( cond, expr1.val(j), c.fastAccessCoeff(j) );
1551  }
1552 
1553  KOKKOS_INLINE_FUNCTION
1554  val_type dx(int i, int j) const {
1555  return if_then_else( cond, expr1.dx(i,j), val_type(0.0) );
1556  }
1557 
1558  KOKKOS_INLINE_FUNCTION
1559  val_type fastAccessDx(int i, int j) const {
1560  return if_then_else( cond, expr1.fastAccessDx(i,j), val_type(0.0) );
1561  }
1562 
1563  protected:
1564 
1565  const CondT& cond;
1566  const T1& expr1;
1567  const typename ConstTypeRef<ConstT,value_type>::type c;
1568  };
1569 
1570  template <typename CondT, typename T1, typename T2>
1571  class IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector> :
1572  public Expr< IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector > > {
1573 
1574  public:
1575 
1576  typedef typename std::remove_cv<T2>::type ExprT2;
1577  typedef T1 ConstT;
1578  typedef typename ExprT2::value_type value_type;
1580 
1582 
1584 
1585  KOKKOS_INLINE_FUNCTION
1586  IfThenElseOp(const CondT& cond_, const ConstT& c_, const T2& expr2_) :
1587  cond(cond_), c(c_), expr2(expr2_) {}
1588 
1589  KOKKOS_INLINE_FUNCTION
1590  int size() const {
1591  return expr2.size();
1592  }
1593 
1594  KOKKOS_INLINE_FUNCTION
1595  bool hasFastAccess() const {
1596  return expr2.hasFastAccess();
1597  }
1598 
1599  KOKKOS_INLINE_FUNCTION
1600  value_type val() const {
1601  return if_then_else( cond, c, expr2.val() );
1602  }
1603 
1604  KOKKOS_INLINE_FUNCTION
1605  val_type val(int j) const {
1606  return if_then_else( cond, c.fastAccessCoeff(j), expr2.val(j) );
1607  }
1608 
1609  KOKKOS_INLINE_FUNCTION
1610  val_type dx(int i, int j) const {
1611  return if_then_else( cond, val_type(0.0), expr2.dx(i,j) );
1612  }
1613 
1614  KOKKOS_INLINE_FUNCTION
1615  val_type fastAccessDx(int i, int j) const {
1616  return if_then_else( cond, val_type(0.0), expr2.fastAccessDx(i,j) );
1617  }
1618 
1619  protected:
1620 
1621  const CondT& cond;
1622  const typename ConstTypeRef<ConstT,value_type>::type c;
1623  const T2& expr2;
1624  };
1625 
1626  }
1627  }
1628 }
1629 
1630 #undef FAD_BINARYOP_MACRO
1631 
1632 #endif // SACADO_FAD_EXP_MP_VECTOR_HPP
expr expr ASinhOp
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION PCE< Storage > fabs(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION PCE< Storage > tan(const PCE< Storage > &a)
expr expr TanhOp
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const ConstT &c_, const T2 &expr2_)
KOKKOS_INLINE_FUNCTION PCE< Storage > sinh(const PCE< Storage > &a)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 MultiplicationOp
expr expr ATanhOp
atanh(expr.val())
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 DivisionOp
KOKKOS_INLINE_FUNCTION PCE< Storage > pow(const PCE< Storage > &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const ConstT &c_)
expr expr ASinOp
KOKKOS_INLINE_FUNCTION PCE< Storage > tanh(const PCE< Storage > &a)
#define FAD_BINARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, CDX1, CDX2, FASTACCESSDX, MPVAL_CONST_DX_1, MPVAL_CONST_DX_2, VAL_CONST_DX_1, VAL_CONST_DX_2, CONST_DX_1, CONST_DX_2, CONST_FASTACCESSDX_1, CONST_FASTACCESSDX_2)
KOKKOS_INLINE_FUNCTION PCE< Storage > cbrt(const PCE< Storage > &a)
expr expr TanOp
KOKKOS_INLINE_FUNCTION PCE< Storage > acos(const PCE< Storage > &a)
atan2(expr1.val(), expr2.val())
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION const val_type & fastAccessDx(int i, int j) const
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION val_type dx(int i, int j) const
Returns derivative component i with bounds checking.
asinh(expr.val())
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MaxOp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 expr1 expr1 expr1 j *expr1 expr2 expr1 expr1 j *expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 Atan2Op
expr expr CoshOp
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
expr expr expr expr fastAccessDx(i, j)) FAD_UNARYOP_MACRO(exp
KOKKOS_INLINE_FUNCTION PCE< Storage > cosh(const PCE< Storage > &a)
#define FAD_UNARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, FASTACCESSDX)
if_then_else(expr.val() >=0, expr.dx(i, j), value_type(-expr.dx(i, j)))
expr expr CosOp
KOKKOS_INLINE_FUNCTION PCE< Storage > abs(const PCE< Storage > &a)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j)-expr2.val(j)
expr expr expr dx(i, j)
KOKKOS_INLINE_FUNCTION PCE< Storage > atan(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION PCE< Storage > exp(const PCE< Storage > &a)
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MinOp
expr expr SinhOp
expr val()
acosh(expr.val())
expr expr SinOp
KOKKOS_INLINE_FUNCTION val_type & fastAccessDx(int i, int j)
Returns derivative component i without bounds checking.
expr expr expr expr ExpOp
expr expr SqrtOp
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
expr expr AbsOp
expr expr ACoshOp
expr expr ATanOp
KOKKOS_INLINE_FUNCTION PCE< Storage > log(const PCE< Storage > &a)
Expression template specialization tag for Fad&lt; MP::Vector &gt;
KOKKOS_INLINE_FUNCTION PCE< Storage > log10(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION PCE< Storage > asin(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION PCE< Storage > cos(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const T2 &expr2_)
expr expr ACosOp