1 #ifndef KOKKOS_MULTIVECTOR_H_ 
    2 #define KOKKOS_MULTIVECTOR_H_ 
    4 #include <Kokkos_Core.hpp> 
   11 #define MAX(a,b) (a<b?b:a) 
   16 template<
typename Scalar, 
class device>
 
   17 struct MultiVectorDynamic{
 
   18 #ifdef KOKKOS_USE_CUSPARSE 
   24   typedef typename device::array_layout layout;
 
   30   MultiVectorDynamic() {}
 
   31   ~MultiVectorDynamic() {}
 
   34 template<
typename Scalar, 
class device, 
int n>
 
   35 struct MultiVectorStatic{
 
   36   typedef Scalar scalar;
 
   37   typedef typename device::array_layout layout;
 
   41   MultiVectorStatic() {}
 
   42   ~MultiVectorStatic() {}
 
   50 template<
class RVector, 
class aVector, 
class XVector>
 
   51 struct MV_MulScalarFunctor
 
   53   typedef typename XVector::device_type        device_type;
 
   54   typedef typename XVector::size_type            size_type;
 
   57   typename XVector::const_type m_x ;
 
   58   typename aVector::const_type m_a ;
 
   60   MV_MulScalarFunctor() {n=1;}
 
   63   KOKKOS_INLINE_FUNCTION
 
   64   void operator()( 
const size_type i)
 const 
   66 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
   69         for(size_type k=0;k<n;k++)
 
   70            m_r(i,k) = m_a[k]*m_x(i,k);
 
   74 template<
class aVector, 
class XVector>
 
   75 struct MV_MulScalarFunctorSelf
 
   77   typedef typename XVector::device_type        device_type;
 
   78   typedef typename XVector::size_type            size_type;
 
   81   typename aVector::const_type   m_a ;
 
   85   KOKKOS_INLINE_FUNCTION
 
   86   void operator()( 
const size_type i)
 const 
   88 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
   91         for(size_type k=0;k<n;k++)
 
   96 template<
class RVector, 
class DataType,
class Layout,
class Device, 
class MemoryManagement,
class Specialisation, 
class XVector>
 
  101     MV_MulScalarFunctorSelf<aVector,XVector> op ;
 
  104     op.n = x.dimension(1);
 
  109   MV_MulScalarFunctor<RVector,aVector,XVector> op ;
 
  113   op.n = x.dimension(1);
 
  118 template<
class RVector, 
class XVector>
 
  119 struct MV_MulScalarFunctor<RVector,typename RVector::value_type,XVector>
 
  121   typedef typename XVector::device_type        device_type;
 
  122   typedef typename XVector::size_type            size_type;
 
  125   typename XVector::const_type m_x ;
 
  126   typename RVector::value_type m_a ;
 
  128   MV_MulScalarFunctor() {n=1;}
 
  131   KOKKOS_INLINE_FUNCTION
 
  132   void operator()( 
const size_type i)
 const 
  134 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  137         for(size_type k=0;k<n;k++)
 
  138            m_r(i,k) = m_a*m_x(i,k);
 
  142 template<
class XVector>
 
  143 struct MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
 
  145   typedef typename XVector::device_type        device_type;
 
  146   typedef typename XVector::size_type            size_type;
 
  149   typename XVector::non_const_value_type m_a ;
 
  153   KOKKOS_INLINE_FUNCTION
 
  154   void operator()( 
const size_type i)
 const 
  156 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  159         for(size_type k=0;k<n;k++)
 
  164 template<
class RVector, 
class XVector>
 
  165 RVector MV_MulScalar( 
const RVector & r, 
const typename XVector::non_const_value_type &a, 
const XVector & x)
 
  176     MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> op ;
 
  179     op.n = x.dimension(1);
 
  184   MV_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
 
  188   op.n = x.dimension(1);
 
  196 template<
class RVector, 
class XVector>
 
  197 struct MV_ReciprocalFunctor
 
  199   typedef typename XVector::device_type        device_type;
 
  200   typedef typename XVector::size_type            size_type;
 
  203   typename XVector::const_type m_x ;
 
  206   MV_ReciprocalFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
 
  209   KOKKOS_INLINE_FUNCTION
 
  210   void operator()( 
const size_type i)
 const 
  212 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  215   for(size_type k=0;k<m_n;k++)
 
  220 template<
class XVector>
 
  221 struct MV_ReciprocalSelfFunctor
 
  223   typedef typename XVector::device_type        device_type;
 
  224   typedef typename XVector::size_type            size_type;
 
  229   MV_ReciprocalSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
 
  232   KOKKOS_INLINE_FUNCTION
 
  233   void operator()( 
const size_type i)
 const 
  235 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  238   for(size_type k=0;k<m_n;k++)
 
  243 template<
class RVector, 
class XVector>
 
  244 RVector MV_Reciprocal( 
const RVector & r, 
const XVector & x)
 
  262     MV_ReciprocalSelfFunctor<XVector> op(x,x.dimension_1()) ;
 
  267   MV_ReciprocalFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
 
  275 template<
class XVector>
 
  276 struct MV_ReciprocalThresholdSelfFunctor
 
  278   typedef typename XVector::device_type           device_type;
 
  279   typedef typename XVector::size_type               size_type;
 
  280   typedef typename XVector::non_const_value_type   value_type;
 
  285   const value_type m_min_val;
 
  286   const value_type m_min_val_mag;
 
  289   MV_ReciprocalThresholdSelfFunctor(
const XVector& x, 
const value_type& min_val, 
const size_type n) :
 
  290     m_x(x), m_min_val(min_val), m_min_val_mag(KAT::
abs(min_val)), m_n(n) {}
 
  293   KOKKOS_INLINE_FUNCTION
 
  294   void operator()( 
const size_type i)
 const 
  296 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  299     for(size_type k=0;k<m_n;k++) {
 
  300       if (
KAT::abs(m_x(i,k)) < m_min_val_mag)
 
  301         m_x(i,k) = m_min_val;
 
  308 template<
class XVector>
 
  309 XVector MV_ReciprocalThreshold( 
const XVector & x, 
const typename XVector::non_const_value_type& min_val )
 
  311   MV_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val,x.dimension_1()) ;
 
  319 template<
class RVector, 
class XVector>
 
  322   typedef typename XVector::device_type        device_type;
 
  323   typedef typename XVector::size_type            size_type;
 
  326   typename XVector::const_type m_x ;
 
  329   MV_AbsFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
 
  332   KOKKOS_INLINE_FUNCTION
 
  333   void operator()( 
const size_type i)
 const 
  335 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  338   for(size_type k=0;k<m_n;k++)
 
  343 template<
class XVector>
 
  344 struct MV_AbsSelfFunctor
 
  346   typedef typename XVector::device_type        device_type;
 
  347   typedef typename XVector::size_type            size_type;
 
  352   MV_AbsSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
 
  355   KOKKOS_INLINE_FUNCTION
 
  356   void operator()( 
const size_type i)
 const 
  358 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  361   for(size_type k=0;k<m_n;k++)
 
  366 template<
class RVector, 
class XVector>
 
  367 RVector MV_Abs( 
const RVector & r, 
const XVector & x)
 
  385     MV_AbsSelfFunctor<XVector> op(x,x.dimension_1()) ;
 
  390   MV_AbsFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
 
  398 template<
class CVector, 
class AVector, 
class BVector>
 
  399 struct MV_ElementWiseMultiplyFunctor
 
  401   typedef typename CVector::device_type        device_type;
 
  402   typedef typename CVector::size_type            size_type;
 
  404   typename CVector::const_value_type m_c;
 
  406   typename AVector::const_value_type m_ab;
 
  407   typename AVector::const_type m_A ;
 
  408   typename BVector::const_type m_B ;
 
  411   MV_ElementWiseMultiplyFunctor(
 
  412       typename CVector::const_value_type c,
 
  414       typename AVector::const_value_type ab,
 
  415       typename AVector::const_type A,
 
  416       typename BVector::const_type B,
 
  418       m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B),m_n(n)
 
  422   KOKKOS_INLINE_FUNCTION
 
  423   void operator()( 
const size_type i)
 const 
  425     typename AVector::const_value_type Ai = m_A(i);
 
  426 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  429   for(size_type k=0;k<m_n;k++)
 
  430      m_C(i,k) = m_c*m_C(i,k) + m_ab*Ai*m_B(i,k);
 
  435 template<
class CVector, 
class AVector, 
class BVector>
 
  436 CVector MV_ElementWiseMultiply(
 
  437       typename CVector::const_value_type c,
 
  439       typename AVector::const_value_type ab,
 
  460   MV_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B,C.dimension_1()) ;
 
  472 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector, 
int scalar_x, 
int scalar_y,
int UNROLL>
 
  473 struct MV_AddUnrollFunctor
 
  475   typedef typename RVector::device_type        device_type;
 
  476   typedef typename RVector::size_type            size_type;
 
  486   MV_AddUnrollFunctor() {n=UNROLL;}
 
  489   KOKKOS_INLINE_FUNCTION
 
  490   void operator()( 
const size_type i )
 const 
  492         if((scalar_x==1)&&(scalar_y==1)){
 
  493 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  496     for(size_type k=0;k<UNROLL;k++)
 
  497       m_r(i,k) = m_x(i,k) + m_y(i,k);
 
  499         if((scalar_x==1)&&(scalar_y==-1)){
 
  500 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  503           for(size_type k=0;k<UNROLL;k++)
 
  504       m_r(i,k) = m_x(i,k) - m_y(i,k);
 
  506         if((scalar_x==-1)&&(scalar_y==-1)){
 
  507 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  510 for(size_type k=0;k<UNROLL;k++)
 
  511       m_r(i,k) = -m_x(i,k) - m_y(i,k);
 
  513         if((scalar_x==-1)&&(scalar_y==1)){
 
  514 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  517 for(size_type k=0;k<UNROLL;k++)
 
  518       m_r(i,k) = -m_x(i,k) + m_y(i,k);
 
  520         if((scalar_x==2)&&(scalar_y==1)){
 
  521 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  524 for(size_type k=0;k<UNROLL;k++)
 
  525       m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
 
  527         if((scalar_x==2)&&(scalar_y==-1)){
 
  528 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  531 for(size_type k=0;k<UNROLL;k++)
 
  532       m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
 
  534         if((scalar_x==1)&&(scalar_y==2)){
 
  535 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  538 for(size_type k=0;k<UNROLL;k++)
 
  539       m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
 
  541         if((scalar_x==-1)&&(scalar_y==2)){
 
  542 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  545 for(size_type k=0;k<UNROLL;k++)
 
  546       m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
 
  548         if((scalar_x==2)&&(scalar_y==2)){
 
  549 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  552 for(size_type k=0;k<UNROLL;k++)
 
  553       m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
 
  558 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector, 
int scalar_x, 
int scalar_y>
 
  559 struct MV_AddVectorFunctor
 
  561   typedef typename RVector::device_type        device_type;
 
  562   typedef typename RVector::size_type            size_type;
 
  571   MV_AddVectorFunctor() {n=1;}
 
  574   KOKKOS_INLINE_FUNCTION
 
  575   void operator()( 
const size_type i )
 const 
  577         if((scalar_x==1)&&(scalar_y==1))
 
  578 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  581 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  582 #pragma vector always 
  584       for(size_type k=0;k<n;k++)
 
  585             m_r(i,k) = m_x(i,k) + m_y(i,k);
 
  586         if((scalar_x==1)&&(scalar_y==-1))
 
  587 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  590 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  591 #pragma vector always 
  593       for(size_type k=0;k<n;k++)
 
  594             m_r(i,k) = m_x(i,k) - m_y(i,k);
 
  595         if((scalar_x==-1)&&(scalar_y==-1))
 
  596 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  599 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  600 #pragma vector always 
  602       for(size_type k=0;k<n;k++)
 
  603             m_r(i,k) = -m_x(i,k) - m_y(i,k);
 
  604         if((scalar_x==-1)&&(scalar_y==1))
 
  605 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  608 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  609 #pragma vector always 
  611       for(size_type k=0;k<n;k++)
 
  612             m_r(i,k) = -m_x(i,k) + m_y(i,k);
 
  613         if((scalar_x==2)&&(scalar_y==1))
 
  614 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  617 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  618 #pragma vector always 
  620       for(size_type k=0;k<n;k++)
 
  621             m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
 
  622         if((scalar_x==2)&&(scalar_y==-1))
 
  623 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  626 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  627 #pragma vector always 
  629       for(size_type k=0;k<n;k++)
 
  630             m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
 
  631         if((scalar_x==1)&&(scalar_y==2))
 
  632 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  635 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  636 #pragma vector always 
  638       for(size_type k=0;k<n;k++)
 
  639             m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
 
  640         if((scalar_x==-1)&&(scalar_y==2))
 
  641 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  644 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  645 #pragma vector always 
  647       for(size_type k=0;k<n;k++)
 
  648             m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
 
  649         if((scalar_x==2)&&(scalar_y==2))
 
  650 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  653 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  654 #pragma vector always 
  656       for(size_type k=0;k<n;k++)
 
  657             m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
 
  664 template<
class RVector, 
class XVector, 
class YVector, 
int scalar_x, 
int scalar_y,
int UNROLL>
 
  665 struct MV_AddUnrollFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y,UNROLL>
 
  667   typedef typename RVector::device_type        device_type;
 
  668   typedef typename RVector::size_type            size_type;
 
  673   typename XVector::non_const_value_type m_a;
 
  674   typename YVector::non_const_value_type m_b;
 
  678   MV_AddUnrollFunctor() {n=UNROLL;}
 
  681   KOKKOS_INLINE_FUNCTION
 
  682   void operator()( 
const size_type i )
 const 
  684   if((scalar_x==1)&&(scalar_y==1)){
 
  685 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  688     for(size_type k=0;k<UNROLL;k++)
 
  689       m_r(i,k) = m_x(i,k) + m_y(i,k);
 
  691   if((scalar_x==1)&&(scalar_y==-1)){
 
  692 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  695     for(size_type k=0;k<UNROLL;k++)
 
  696       m_r(i,k) = m_x(i,k) - m_y(i,k);
 
  698   if((scalar_x==-1)&&(scalar_y==-1)){
 
  699 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  702 for(size_type k=0;k<UNROLL;k++)
 
  703       m_r(i,k) = -m_x(i,k) - m_y(i,k);
 
  705   if((scalar_x==-1)&&(scalar_y==1)){
 
  706 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  709 for(size_type k=0;k<UNROLL;k++)
 
  710       m_r(i,k) = -m_x(i,k) + m_y(i,k);
 
  712   if((scalar_x==2)&&(scalar_y==1)){
 
  713 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  716 for(size_type k=0;k<UNROLL;k++)
 
  717       m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
 
  719   if((scalar_x==2)&&(scalar_y==-1)){
 
  720 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  723 for(size_type k=0;k<UNROLL;k++)
 
  724       m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
 
  726   if((scalar_x==1)&&(scalar_y==2)){
 
  727 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  730 for(size_type k=0;k<UNROLL;k++)
 
  731       m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
 
  733   if((scalar_x==-1)&&(scalar_y==2)){
 
  734 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  737 for(size_type k=0;k<UNROLL;k++)
 
  738       m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
 
  740   if((scalar_x==2)&&(scalar_y==2)){
 
  741 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
  744 for(size_type k=0;k<UNROLL;k++)
 
  745       m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
 
  750 template<
class RVector, 
class XVector, 
class YVector, 
int scalar_x, 
int scalar_y>
 
  751 struct MV_AddVectorFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y>
 
  753   typedef typename RVector::device_type        device_type;
 
  754   typedef typename RVector::size_type            size_type;
 
  759   typename XVector::non_const_value_type m_a;
 
  760   typename YVector::non_const_value_type m_b;
 
  763   MV_AddVectorFunctor() {n=1;}
 
  766   KOKKOS_INLINE_FUNCTION
 
  767   void operator()( 
const size_type i )
 const 
  769   if((scalar_x==1)&&(scalar_y==1))
 
  770 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  773 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  774 #pragma vector always 
  776       for(size_type k=0;k<n;k++)
 
  777       m_r(i,k) = m_x(i,k) + m_y(i,k);
 
  778   if((scalar_x==1)&&(scalar_y==-1))
 
  779 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  782 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  783 #pragma vector always 
  785       for(size_type k=0;k<n;k++)
 
  786       m_r(i,k) = m_x(i,k) - m_y(i,k);
 
  787   if((scalar_x==-1)&&(scalar_y==-1))
 
  788 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  791 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  792 #pragma vector always 
  794       for(size_type k=0;k<n;k++)
 
  795       m_r(i,k) = -m_x(i,k) - m_y(i,k);
 
  796   if((scalar_x==-1)&&(scalar_y==1))
 
  797 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  800 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  801 #pragma vector always 
  803       for(size_type k=0;k<n;k++)
 
  804       m_r(i,k) = -m_x(i,k) + m_y(i,k);
 
  805   if((scalar_x==2)&&(scalar_y==1))
 
  806 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  809 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  810 #pragma vector always 
  812       for(size_type k=0;k<n;k++)
 
  813       m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
 
  814   if((scalar_x==2)&&(scalar_y==-1))
 
  815 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  818 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  819 #pragma vector always 
  821       for(size_type k=0;k<n;k++)
 
  822       m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
 
  823   if((scalar_x==1)&&(scalar_y==2))
 
  824 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  827 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  828 #pragma vector always 
  830       for(size_type k=0;k<n;k++)
 
  831       m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
 
  832   if((scalar_x==-1)&&(scalar_y==2))
 
  833 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  836 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  837 #pragma vector always 
  839       for(size_type k=0;k<n;k++)
 
  840       m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
 
  841   if((scalar_x==2)&&(scalar_y==2))
 
  842 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
  845 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
  846 #pragma vector always 
  848       for(size_type k=0;k<n;k++)
 
  849       m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
 
  854 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector,
int UNROLL>
 
  855 RVector MV_AddUnroll( 
const RVector & r,
const aVector &av,
const XVector & x,
 
  856                 const bVector &bv, 
const YVector & y, 
int n,
 
  860      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,1,UNROLL> op ;
 
  866      op.n = x.dimension(1);
 
  871      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,-1,UNROLL> op ;
 
  877      op.n = x.dimension(1);
 
  882      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,1,UNROLL> op ;
 
  888      op.n = x.dimension(1);
 
  893      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1,UNROLL> op ;
 
  899      op.n = x.dimension(1);
 
  904      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,1,UNROLL> op ;
 
  910      op.n = x.dimension(1);
 
  915      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,-1,UNROLL> op ;
 
  921      op.n = x.dimension(1);
 
  926      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,2,UNROLL> op ;
 
  932      op.n = x.dimension(1);
 
  937      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,2,UNROLL> op ;
 
  943      op.n = x.dimension(1);
 
  947    MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,2,UNROLL> op ;
 
  953    op.n = x.dimension(1);
 
  959 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector>
 
  960 RVector MV_AddUnroll( 
const RVector & r,
const aVector &av,
const XVector & x,
 
  961                 const bVector &bv, 
const YVector & y, 
int n,
 
  964         switch (x.dimension(1)){
 
  965       case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,n,a,b);
 
  967       case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,n,a,b);
 
  969       case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,n,a,b);
 
  971       case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,n,a,b);
 
  973       case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,n,a,b);
 
  975       case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,n,a,b);
 
  977       case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,n,a,b);
 
  979       case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,n,a,b);
 
  981       case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,n,a,b);
 
  983       case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,n,a,b);
 
  985       case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,n,a,b);
 
  987       case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,n,a,b);
 
  989       case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,n,a,b);
 
  991       case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,n,a,b);
 
  993       case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,n,a,b);
 
  995       case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,n,a,b);
 
 1002 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector>
 
 1003 RVector MV_AddVector( 
const RVector & r,
const aVector &av,
const XVector & x,
 
 1004                 const bVector &bv, 
const YVector & y, 
int n,
 
 1008      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ;
 
 1014      op.n = x.dimension(1);
 
 1019      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ;
 
 1025      op.n = x.dimension(1);
 
 1030      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ;
 
 1036      op.n = x.dimension(1);
 
 1041      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ;
 
 1047      op.n = x.dimension(1);
 
 1052      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ;
 
 1058      op.n = x.dimension(1);
 
 1063      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ;
 
 1069      op.n = x.dimension(1);
 
 1074      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ;
 
 1080      op.n = x.dimension(1);
 
 1085      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ;
 
 1091      op.n = x.dimension(1);
 
 1095    MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ;
 
 1101    op.n = x.dimension(1);
 
 1107 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector>
 
 1108 RVector MV_AddSpecialise( 
const RVector & r,
const aVector &av,
const XVector & x,
 
 1109                 const bVector &bv, 
const YVector & y, 
unsigned int n,
 
 1113         if(x.dimension(1)>16)
 
 1114                 return MV_AddVector( r,av,x,bv,y,a,b);
 
 1116         if(x.dimension_1()==1) {
 
 1117     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
 
 1118     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
 
 1119     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
 
 1121     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
 
 1122     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
 
 1123     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
 
 1125     V_Add(r_1d,av,x_1d,bv,y_1d,n);
 
 1128         return MV_AddUnroll( r,av,x,bv,y,a,b);
 
 1131 template<
class RVector,
class aVector, 
class XVector, 
class bVector, 
class YVector>
 
 1132 RVector MV_Add( 
const RVector & r,
const aVector &av,
const XVector & x,
 
 1133     const bVector &bv, 
const YVector & y, 
int n = -1)
 
 1135   if(n==-1) n = x.dimension_0();
 
 1136   if(x.dimension(1)>16)
 
 1137     return MV_AddVector( r,av,x,bv,y,n,2,2);
 
 1139   if(x.dimension_1()==1) {
 
 1140     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
 
 1141     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
 
 1142     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
 
 1144     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
 
 1145     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
 
 1146     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
 
 1148     V_Add(r_1d,av,x_1d,bv,y_1d,n);
 
 1151   return MV_AddUnroll( r,av,x,bv,y,n,2,2);
 
 1154 template<
class RVector,
class XVector,
class YVector>
 
 1155 RVector MV_Add( 
const RVector & r, 
const XVector & x, 
const YVector & y, 
int n = -1)
 
 1157   if(n==-1) n = x.dimension_0();
 
 1158   if(x.dimension_1()==1) {
 
 1159     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
 
 1160     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
 
 1161     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
 
 1163     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
 
 1164     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
 
 1165     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
 
 1167     V_Add(r_1d,x_1d,y_1d,n);
 
 1170     typename XVector::const_value_type a = 1.0;
 
 1171     return MV_AddSpecialise(r,a,x,a,y,n,1,1);
 
 1175 template<
class RVector,
class XVector,
class bVector, 
class YVector>
 
 1176 RVector MV_Add( 
const RVector & r, 
const XVector & x, 
const bVector & bv, 
const YVector & y, 
int n = -1 )
 
 1178   if(n==-1) n = x.dimension_0();
 
 1179   if(x.dimension_1()==1) {
 
 1180     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
 
 1181     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
 
 1182     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
 
 1184     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
 
 1185     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
 
 1186     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
 
 1188     V_Add(r_1d,x_1d,bv,y_1d,n);
 
 1191     MV_AddSpecialise(r,bv,x,bv,y,n,1,2);
 
 1196 template<
class XVector,
class YVector>
 
 1197 struct MV_DotProduct_Right_FunctorVector
 
 1199   typedef typename XVector::device_type         device_type;
 
 1200   typedef typename XVector::size_type             size_type;
 
 1201   typedef typename XVector::non_const_value_type          xvalue_type;
 
 1202   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 1204   size_type value_count;
 
 1207   typedef typename XVector::const_type        x_const_type;
 
 1208   typedef typename YVector::const_type        y_const_type;
 
 1214   KOKKOS_INLINE_FUNCTION
 
 1215   void operator()( 
const size_type i, value_type sum )
 const 
 1217     const size_type numVecs=value_count;
 
 1219 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1222 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1223 #pragma vector always 
 1225     for(size_type k=0;k<numVecs;k++)
 
 1226       sum[k]+=
IPT::dot( m_x(i,k), m_y(i,k) );  
 
 1228   KOKKOS_INLINE_FUNCTION 
void init( value_type update)
 const 
 1230     const size_type numVecs = value_count;
 
 1231 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1234 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1235 #pragma vector always 
 1237     for(size_type k=0;k<numVecs;k++)
 
 1240   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type  update ,
 
 1241                                     const volatile value_type  source )
 const 
 1243     const size_type numVecs = value_count;
 
 1244 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1247 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1248 #pragma vector always 
 1250     for(size_type k=0;k<numVecs;k++){
 
 1251       update[k] += source[k];
 
 1261 template<
class MultiVecViewType>
 
 1262 struct MultiVecDotFunctor {
 
 1263   typedef typename MultiVecViewType::device_type device_type;
 
 1264   typedef typename MultiVecViewType::size_type size_type;
 
 1265   typedef typename MultiVecViewType::value_type mv_value_type;
 
 1269   typedef MultiVecViewType mv_view_type;
 
 1270   typedef typename MultiVecViewType::const_type mv_const_view_type;
 
 1273   mv_const_view_type X_, Y_;
 
 1274   dot_view_type dots_;
 
 1276   size_type value_count;
 
 1278   MultiVecDotFunctor (
const mv_const_view_type& X,
 
 1279                       const mv_const_view_type& Y,
 
 1280                       const dot_view_type& dots) :
 
 1281     X_ (X), Y_ (Y), dots_ (dots), value_count (X.dimension_1 ())
 
 1283     if (value_count != dots.dimension_0 ()) {
 
 1284 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 
 1285       cuda_abort(
"Kokkos::MultiVecDotFunctor: value_count does not match the length of 'dots'");
 
 1287       std::ostringstream os;
 
 1288       os << 
"Kokkos::MultiVecDotFunctor: value_count does not match the length " 
 1289         "of 'dots'.  X is " << X.dimension_0 () << 
" x " << X.dimension_1 () <<
 
 1290         ", Y is " << Y.dimension_0 () << 
" x " << Y.dimension_1 () << 
", " 
 1291         "dots has length " << dots.dimension_0 () << 
", and value_count = " <<
 
 1293       throw std::invalid_argument (os.str ());
 
 1299   KOKKOS_INLINE_FUNCTION 
void 
 1300   operator() (
const size_type i, value_type sum)
 const 
 1302     const size_type numVecs = value_count;
 
 1303 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1306 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1307 #pragma vector always 
 1309     for (size_type k = 0; k < numVecs; ++k) {
 
 1310       sum[k] += 
IPT::dot (X_(i,k), Y_(i,k));
 
 1314   KOKKOS_INLINE_FUNCTION 
void 
 1315   init (value_type update)
 const 
 1317     const size_type numVecs = value_count;
 
 1318 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1321 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1322 #pragma vector always 
 1324     for (size_type k = 0; k < numVecs; ++k) {
 
 1329   KOKKOS_INLINE_FUNCTION 
void 
 1330   join (
volatile value_type update,
 
 1331         const volatile value_type source)
 const 
 1333     const size_type numVecs = value_count;
 
 1334 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1337 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1338 #pragma vector always 
 1340     for (size_type k = 0; k < numVecs; ++k) {
 
 1341       update[k] += source[k];
 
 1346   KOKKOS_INLINE_FUNCTION 
void 
 1347   final (
const value_type dst) 
const 
 1349     const size_type numVecs = value_count;
 
 1351 #if !defined(__CUDA_ARCH__) 
 1354       std::ostringstream os;
 
 1355       os << 
"numVecs: " << numVecs << 
", dst: [";
 
 1356       for (
size_t j = 0; j < numVecs; ++j) {
 
 1358         if (j + 1 < numVecs) {
 
 1362       os << 
"]" << std::endl;
 
 1363       std::cerr << os.str ();
 
 1367 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1370 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1371 #pragma vector always 
 1373     for (size_type k = 0; k < numVecs; ++k) {
 
 1377 #if !defined(__CUDA_ARCH__) 
 1380       std::ostringstream os;
 
 1381       os << 
"numVecs: " << numVecs << 
", dots_: [";
 
 1382       for (
size_t j = 0; j < numVecs; ++j) {
 
 1384         if (j + 1 < numVecs) {
 
 1388       os << 
"]" << std::endl;
 
 1389       std::cerr << os.str ();
 
 1399 template<
class MultiVecViewType>
 
 1400 struct MultiVecNorm2SquaredFunctor {
 
 1401   typedef typename MultiVecViewType::device_type device_type;
 
 1402   typedef typename MultiVecViewType::size_type size_type;
 
 1403   typedef typename MultiVecViewType::value_type mv_value_type;
 
 1407   typedef MultiVecViewType mv_view_type;
 
 1408   typedef typename MultiVecViewType::const_type mv_const_view_type;
 
 1411   mv_const_view_type X_;
 
 1412   norms_view_type norms_;
 
 1414   size_type value_count;
 
 1416   MultiVecNorm2SquaredFunctor (
const mv_const_view_type& X,
 
 1417                                const norms_view_type& norms) :
 
 1418     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
 
 1420     if (value_count != norms.dimension_0 ()) {
 
 1421 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 
 1422       cuda_abort(
"Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match the length of 'norms'");
 
 1424       std::ostringstream os;
 
 1425       os << 
"Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match " 
 1426         "the length of 'norms'.  X is " << X.dimension_0 () << 
" x " <<
 
 1427         X.dimension_1 () << 
", norms has length " << norms.dimension_0 () <<
 
 1428         ", and value_count = " << value_count << 
".";
 
 1429       throw std::invalid_argument (os.str ());
 
 1434   KOKKOS_INLINE_FUNCTION 
void 
 1435   operator() (
const size_type i, value_type sum)
 const 
 1437     const size_type numVecs = value_count;
 
 1438 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1441 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1442 #pragma vector always 
 1444     for (size_type k = 0; k < numVecs; ++k) {
 
 1446       sum[k] += tmp * tmp;
 
 1450   KOKKOS_INLINE_FUNCTION 
void 
 1451   init (value_type update)
 const 
 1453     const size_type numVecs = value_count;
 
 1454 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1457 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1458 #pragma vector always 
 1460     for (size_type k = 0; k < numVecs; ++k) {
 
 1465   KOKKOS_INLINE_FUNCTION 
void 
 1466   join (
volatile value_type update,
 
 1467         const volatile value_type source)
 const 
 1469     const size_type numVecs = value_count;
 
 1470 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1473 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1474 #pragma vector always 
 1476     for (size_type k = 0; k < numVecs; ++k) {
 
 1477       update[k] += source[k];
 
 1482   KOKKOS_INLINE_FUNCTION 
void 
 1483   final (
const value_type dst) 
const 
 1485     const size_type numVecs = value_count;
 
 1486 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1489 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1490 #pragma vector always 
 1492     for (size_type k = 0; k < numVecs; ++k) {
 
 1502 template<
class MultiVecViewType>
 
 1503 struct MultiVecNorm1Functor {
 
 1504   typedef typename MultiVecViewType::device_type device_type;
 
 1505   typedef typename MultiVecViewType::size_type size_type;
 
 1506   typedef typename MultiVecViewType::value_type mv_value_type;
 
 1510   typedef MultiVecViewType mv_view_type;
 
 1511   typedef typename MultiVecViewType::const_type mv_const_view_type;
 
 1514   mv_const_view_type X_;
 
 1515   norms_view_type norms_;
 
 1517   size_type value_count;
 
 1519   MultiVecNorm1Functor (
const mv_const_view_type& X,
 
 1520                         const norms_view_type& norms) :
 
 1521     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
 
 1523     if (value_count != norms.dimension_0 ()) {
 
 1524 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 
 1525       cuda_abort(
"Kokkos::MultiVecNorm1Functor: value_count does not match the length of 'norms'");
 
 1527       std::ostringstream os;
 
 1528       os << 
"Kokkos::MultiVecNorm1Functor: value_count does not match the " 
 1529          << 
"length of 'norms'.  X is " << X.dimension_0 () << 
" x " 
 1530          << X.dimension_1 () << 
", norms has length " << norms.dimension_0 ()
 
 1531          << 
", and value_count = " << value_count << 
".";
 
 1532       throw std::invalid_argument (os.str ());
 
 1537   KOKKOS_INLINE_FUNCTION 
void 
 1538   operator() (
const size_type i, value_type sum)
 const 
 1540     const size_type numVecs = value_count;
 
 1541 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1544 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1545 #pragma vector always 
 1547     for (size_type k = 0; k < numVecs; ++k) {
 
 1552   KOKKOS_INLINE_FUNCTION 
void 
 1553   init (value_type update)
 const 
 1555     const size_type numVecs = value_count;
 
 1556 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1559 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1560 #pragma vector always 
 1562     for (size_type k = 0; k < numVecs; ++k) {
 
 1567   KOKKOS_INLINE_FUNCTION 
void 
 1568   join (
volatile value_type update,
 
 1569         const volatile value_type source)
 const 
 1571     const size_type numVecs = value_count;
 
 1572 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1575 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1576 #pragma vector always 
 1578     for (size_type k = 0; k < numVecs; ++k) {
 
 1579       update[k] += source[k];
 
 1584   KOKKOS_INLINE_FUNCTION 
void 
 1585   final (
const value_type dst) 
const 
 1587     const size_type numVecs = value_count;
 
 1588 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1591 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1592 #pragma vector always 
 1594     for (size_type k = 0; k < numVecs; ++k) {
 
 1604 template<
class MultiVecViewType>
 
 1605 struct MultiVecNormInfFunctor {
 
 1606   typedef typename MultiVecViewType::device_type device_type;
 
 1607   typedef typename MultiVecViewType::size_type size_type;
 
 1608   typedef typename MultiVecViewType::value_type mv_value_type;
 
 1612   typedef MultiVecViewType mv_view_type;
 
 1613   typedef typename MultiVecViewType::const_type mv_const_view_type;
 
 1616   mv_const_view_type X_;
 
 1617   norms_view_type norms_;
 
 1619   size_type value_count;
 
 1621   MultiVecNormInfFunctor (
const mv_const_view_type& X,
 
 1622                           const norms_view_type& norms) :
 
 1623     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
 
 1625     if (value_count != norms.dimension_0 ()) {
 
 1626 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 
 1627       cuda_abort(
"Kokkos::MultiVecNormInfFunctor: value_count does not match the length of 'norms'");
 
 1629       std::ostringstream os;
 
 1630       os << 
"Kokkos::MultiVecNormInfFunctor: value_count does not match the " 
 1631          << 
"length of 'norms'.  X is " << X.dimension_0 () << 
" x " 
 1632          << X.dimension_1 () << 
", norms has length " << norms.dimension_0 ()
 
 1633          << 
", and value_count = " << value_count << 
".";
 
 1634       throw std::invalid_argument (os.str ());
 
 1639   KOKKOS_INLINE_FUNCTION 
void 
 1640   operator() (
const size_type i, value_type maxes)
 const 
 1642     const size_type numVecs = value_count;
 
 1643 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1646 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1647 #pragma vector always 
 1649     for (size_type k = 0; k < numVecs; ++k) {
 
 1658       if (curVal < newVal) {
 
 1664   KOKKOS_INLINE_FUNCTION 
void 
 1665   init (value_type update)
 const 
 1667     const size_type numVecs = value_count;
 
 1668 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1671 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1672 #pragma vector always 
 1674     for (size_type k = 0; k < numVecs; ++k) {
 
 1682   KOKKOS_INLINE_FUNCTION 
void 
 1683   join (
volatile value_type update,
 
 1684         const volatile value_type source)
 const 
 1686     const size_type numVecs = value_count;
 
 1687 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1690 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1691 #pragma vector always 
 1693     for (size_type k = 0; k < numVecs; ++k) {
 
 1702       if (curVal < newVal) {
 
 1709   KOKKOS_INLINE_FUNCTION 
void 
 1710   final (
const value_type dst) 
const 
 1712     const size_type numVecs = value_count;
 
 1713 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 1716 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 1717 #pragma vector always 
 1719     for (size_type k = 0; k < numVecs; ++k) {
 
 1728 template<
class VecViewType>
 
 1729 struct VecDotFunctor {
 
 1730   typedef typename VecViewType::device_type device_type;
 
 1731   typedef typename VecViewType::size_type size_type;
 
 1732   typedef typename VecViewType::value_type mv_value_type;
 
 1735   typedef typename VecViewType::const_type vec_const_view_type;
 
 1740   vec_const_view_type x_, y_;
 
 1743   VecDotFunctor (
const vec_const_view_type& x,
 
 1744                  const vec_const_view_type& y,
 
 1745                  const dot_view_type& dot) :
 
 1746     x_ (x), y_ (y), dot_ (dot)
 
 1748     if (x.dimension_0 () != y.dimension_0 ()) {
 
 1749       std::ostringstream os;
 
 1750       os << 
"Kokkos::VecDotFunctor: The dimensions of x and y do not match.  " 
 1751         "x.dimension_0() = " << x.dimension_0 ()
 
 1752          << 
" != y.dimension_0() = " << y.dimension_0 () << 
".";
 
 1753       throw std::invalid_argument (os.str ());
 
 1757   KOKKOS_INLINE_FUNCTION 
void 
 1758   operator() (
const size_type i, value_type& sum)
 const {
 
 1762   KOKKOS_INLINE_FUNCTION 
void 
 1763   init (value_type& update)
 const {
 
 1767   KOKKOS_INLINE_FUNCTION 
void 
 1768   join (
volatile value_type& update,
 
 1769         const volatile value_type& source)
 const {
 
 1774   KOKKOS_INLINE_FUNCTION 
void final (value_type& dst) 
const {
 
 1782 template<
class VecViewType>
 
 1783 struct VecNorm2SquaredFunctor {
 
 1784   typedef typename VecViewType::device_type device_type;
 
 1785   typedef typename VecViewType::size_type size_type;
 
 1786   typedef typename VecViewType::value_type mv_value_type;
 
 1789   typedef typename VecViewType::const_type vec_const_view_type;
 
 1793   vec_const_view_type x_;
 
 1794   norm_view_type norm_;
 
 1800   VecNorm2SquaredFunctor (
const vec_const_view_type& x,
 
 1801                           const norm_view_type& norm) :
 
 1802     x_ (x), norm_ (norm)
 
 1805   KOKKOS_INLINE_FUNCTION 
void 
 1806   operator() (
const size_type i, value_type& sum)
 const {
 
 1811   KOKKOS_INLINE_FUNCTION 
void 
 1812   init (value_type& update)
 const {
 
 1816   KOKKOS_INLINE_FUNCTION 
void 
 1817   join (
volatile value_type& update,
 
 1818         const volatile value_type& source)
 const {
 
 1823   KOKKOS_INLINE_FUNCTION 
void final (value_type& dst) 
const {
 
 1830 template<
class VecViewType>
 
 1831 struct VecNorm1Functor {
 
 1832   typedef typename VecViewType::device_type device_type;
 
 1833   typedef typename VecViewType::size_type size_type;
 
 1834   typedef typename VecViewType::value_type mv_value_type;
 
 1837   typedef typename VecViewType::const_type vec_const_view_type;
 
 1841   vec_const_view_type x_;
 
 1842   norm_view_type norm_;
 
 1848   VecNorm1Functor (
const vec_const_view_type& x,
 
 1849                    const norm_view_type& norm) :
 
 1850     x_ (x), norm_ (norm)
 
 1853   KOKKOS_INLINE_FUNCTION 
void 
 1854   operator() (
const size_type i, value_type& sum)
 const {
 
 1858   KOKKOS_INLINE_FUNCTION 
void 
 1859   init (value_type& update)
 const {
 
 1863   KOKKOS_INLINE_FUNCTION 
void 
 1864   join (
volatile value_type& update,
 
 1865         const volatile value_type& source)
 const {
 
 1870   KOKKOS_INLINE_FUNCTION 
void final (value_type& dst) 
const {
 
 1877 template<
class VecViewType>
 
 1878 struct VecNormInfFunctor {
 
 1879   typedef typename VecViewType::device_type device_type;
 
 1880   typedef typename VecViewType::size_type size_type;
 
 1881   typedef typename VecViewType::value_type mv_value_type;
 
 1884   typedef typename VecViewType::const_type vec_const_view_type;
 
 1888   vec_const_view_type x_;
 
 1889   norm_view_type norm_;
 
 1895   VecNormInfFunctor (
const vec_const_view_type& x,
 
 1896                      const norm_view_type& norm) :
 
 1897     x_ (x), norm_ (norm)
 
 1900   KOKKOS_INLINE_FUNCTION 
void 
 1901   operator() (
const size_type i, value_type& curVal)
 const {
 
 1909     if (curVal < newVal) {
 
 1914   KOKKOS_INLINE_FUNCTION 
void 
 1915   init (value_type& update)
 const {
 
 1922   KOKKOS_INLINE_FUNCTION 
void 
 1923   join (
volatile value_type& update,
 
 1924         const volatile value_type& source)
 const {
 
 1931     if (update < source) {
 
 1937   KOKKOS_INLINE_FUNCTION 
void final (value_type& dst) 
const {
 
 1955 template<
class ViewType>
 
 1956 class SquareRootFunctor {
 
 1958   typedef typename ViewType::device_type device_type;
 
 1959   typedef typename ViewType::size_type size_type;
 
 1961   SquareRootFunctor (
const ViewType& theView) : theView_ (theView) {}
 
 1963   KOKKOS_INLINE_FUNCTION 
void operator() (
const size_type i)
 const {
 
 1964     typedef typename ViewType::value_type value_type;
 
 1973 template<
class XVector,
class YVector,
int UNROLL>
 
 1974 struct MV_DotProduct_Right_FunctorUnroll
 
 1976   typedef typename XVector::device_type         device_type;
 
 1977   typedef typename XVector::size_type             size_type;
 
 1978   typedef typename XVector::non_const_value_type          xvalue_type;
 
 1979   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 1981   size_type value_count;
 
 1983   typedef typename XVector::const_type        x_const_type;
 
 1984   typedef typename YVector::const_type        y_const_type;
 
 1991   KOKKOS_INLINE_FUNCTION
 
 1992   void operator()( 
const size_type i, value_type sum )
 const 
 1994 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
 1997     for(size_type k=0;k<UNROLL;k++)
 
 1998       sum[k]+= 
IPT::dot( m_x(i,k), m_y(i,k) );  
 
 2000   KOKKOS_INLINE_FUNCTION 
void init( 
volatile value_type update)
 const 
 2002 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
 2005     for(size_type k=0;k<UNROLL;k++)
 
 2008   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type update ,
 
 2009                                     const volatile value_type source)
 const 
 2011 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 
 2014     for(size_type k=0;k<UNROLL;k++)
 
 2015       update[k] += source[k] ;
 
 2019 template<
class rVector, 
class XVector, 
class YVector>
 
 2020 rVector MV_Dot(
const rVector &r, 
const XVector & x, 
const YVector & y, 
int n = -1)
 
 2022     typedef typename XVector::size_type            size_type;
 
 2023     const size_type numVecs = x.dimension(1);
 
 2025     if(n<0) n = x.dimension_0();
 
 2028         MV_DotProduct_Right_FunctorVector<XVector,YVector> op;
 
 2031         op.value_count = numVecs;
 
 2039            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op;
 
 2042            op.value_count = numVecs;
 
 2047            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op;
 
 2050            op.value_count = numVecs;
 
 2055            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op;
 
 2058            op.value_count = numVecs;
 
 2063            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op;
 
 2066            op.value_count = numVecs;
 
 2071            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op;
 
 2074            op.value_count = numVecs;
 
 2079            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op;
 
 2082            op.value_count = numVecs;
 
 2087            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op;
 
 2090            op.value_count = numVecs;
 
 2095            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op;
 
 2098            op.value_count = numVecs;
 
 2103            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op;
 
 2106            op.value_count = numVecs;
 
 2111            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op;
 
 2114            op.value_count = numVecs;
 
 2119            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op;
 
 2122            op.value_count = numVecs;
 
 2127            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op;
 
 2130            op.value_count = numVecs;
 
 2135            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op;
 
 2138            op.value_count = numVecs;
 
 2144            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op;
 
 2147            op.value_count = numVecs;
 
 2152            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op;
 
 2155            op.value_count = numVecs;
 
 2160          typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
 
 2161          typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
 
 2163          XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
 
 2164          YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
 
 2165          r[0] = V_Dot(x_1d,y_1d,n);
 
 2176 template<
class XVector>
 
 2177 struct MV_Sum_Functor
 
 2179   typedef typename XVector::device_type        device_type;
 
 2180   typedef typename XVector::size_type            size_type;
 
 2181   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2182   typedef xvalue_type                         value_type[];
 
 2184   typename XVector::const_type m_x ;
 
 2185   size_type value_count;
 
 2187   MV_Sum_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
 
 2190   KOKKOS_INLINE_FUNCTION
 
 2191   void operator()( 
const size_type i, value_type sum )
 const 
 2193 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2196 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2197 #pragma vector always 
 2199     for(size_type k=0;k<value_count;k++){
 
 2204   KOKKOS_INLINE_FUNCTION
 
 2205   void init( value_type update)
 const 
 2207 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2210 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2211 #pragma vector always 
 2213     for(size_type k=0;k<value_count;k++)
 
 2217   KOKKOS_INLINE_FUNCTION
 
 2218   void join( 
volatile value_type  update ,
 
 2219                                     const volatile value_type  source )
 const 
 2221 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2224 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2225 #pragma vector always 
 2227     for(size_type k=0;k<value_count;k++){
 
 2228       update[k] += source[k];
 
 2234 template<
class normVector, 
class VectorType>
 
 2235 normVector MV_Sum(
const normVector &r, 
const VectorType & x, 
int n = -1)
 
 2238     n = x.dimension_0 ();
 
 2248 template<
class XVector>
 
 2249 struct MV_Norm1_Functor
 
 2251   typedef typename XVector::device_type        device_type;
 
 2252   typedef typename XVector::size_type            size_type;
 
 2253   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2254   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 2257   typename XVector::const_type m_x ;
 
 2258   size_type value_count;
 
 2260   MV_Norm1_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
 
 2263   KOKKOS_INLINE_FUNCTION
 
 2264   void operator()( 
const size_type i, value_type sum )
 const 
 2266 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2269 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2270 #pragma vector always 
 2272     for(size_type k=0;k<value_count;k++){
 
 2277   KOKKOS_INLINE_FUNCTION
 
 2278   void init( value_type update)
 const 
 2280 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2283 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2284 #pragma vector always 
 2286     for(size_type k=0;k<value_count;k++)
 
 2290   KOKKOS_INLINE_FUNCTION
 
 2291   void join( 
volatile value_type  update ,
 
 2292                                     const volatile value_type  source )
 const 
 2294 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2297 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2298 #pragma vector always 
 2300     for(size_type k=0;k<value_count;k++){
 
 2301       update[k] += source[k];
 
 2306 template<
class normVector, 
class VectorType>
 
 2307 normVector MV_Norm1(
const normVector &r, 
const VectorType & x, 
int n = -1)
 
 2310     n = x.dimension_0 ();
 
 2320 template<
class XVector>
 
 2321 struct MV_NormInf_Functor
 
 2323   typedef typename XVector::device_type        device_type;
 
 2324   typedef typename XVector::size_type            size_type;
 
 2325   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2326   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 2329   typename XVector::const_type m_x ;
 
 2330   size_type value_count;
 
 2332   MV_NormInf_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
 
 2334   KOKKOS_INLINE_FUNCTION
 
 2335   void operator()( 
const size_type i, value_type sum )
 const 
 2337 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2340 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2341 #pragma vector always 
 2343     for(size_type k=0;k<value_count;k++){
 
 2348   KOKKOS_INLINE_FUNCTION 
void init( value_type update)
 const 
 2350 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2353 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2354 #pragma vector always 
 2356     for(size_type k=0;k<value_count;k++)
 
 2359   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type  update ,
 
 2360                                     const volatile value_type  source )
 const 
 2362 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2365 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2366 #pragma vector always 
 2368     for(size_type k=0;k<value_count;k++){
 
 2369       update[k] = MAX(update[k],source[k]);
 
 2375 template<
class normVector, 
class VectorType>
 
 2376 normVector MV_NormInf(
const normVector &r, 
const VectorType & x, 
int n = -1)
 
 2379     n = x.dimension_0 ();
 
 2389 template<
class WeightVector, 
class XVector,
int WeightsRanks>
 
 2390 struct MV_DotWeighted_Functor{};
 
 2392 template<
class WeightVector, 
class XVector>
 
 2393 struct MV_DotWeighted_Functor<WeightVector,XVector,1>
 
 2395   typedef typename XVector::device_type        device_type;
 
 2396   typedef typename XVector::size_type            size_type;
 
 2397   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2398   typedef typename WeightVector::non_const_value_type     wvalue_type;
 
 2399   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
 
 2400   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
 
 2401   typedef typename XIPT::dot_type               value_type[];
 
 2403   typename WeightVector::const_type m_w ;
 
 2404   typename XVector::const_type m_x ;
 
 2405   size_type value_count;
 
 2407   MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
 
 2409   KOKKOS_INLINE_FUNCTION
 
 2410   void operator()( 
const size_type i, value_type sum )
 const 
 2412 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2415 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2416 #pragma vector always 
 2418     for(size_type k=0;k<value_count;k++){
 
 2419       sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i), m_w(i) );
 
 2423   KOKKOS_INLINE_FUNCTION 
void init( value_type update)
 const 
 2425 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2428 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2429 #pragma vector always 
 2431     for(size_type k=0;k<value_count;k++)
 
 2434   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type  update ,
 
 2435                                     const volatile value_type  source )
 const 
 2437 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2440 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2441 #pragma vector always 
 2443     for(size_type k=0;k<value_count;k++){
 
 2444       update[k] += source[k];
 
 2449 template<
class WeightVector, 
class XVector>
 
 2450 struct MV_DotWeighted_Functor<WeightVector,XVector,2>
 
 2452   typedef typename XVector::device_type        device_type;
 
 2453   typedef typename XVector::size_type            size_type;
 
 2454   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2455   typedef typename WeightVector::non_const_value_type     wvalue_type;
 
 2456   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
 
 2457   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
 
 2458   typedef typename XIPT::dot_type               value_type[];
 
 2460   typename WeightVector::const_type m_w ;
 
 2461   typename XVector::const_type m_x ;
 
 2462   size_type value_count;
 
 2464   MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
 
 2466   KOKKOS_INLINE_FUNCTION
 
 2467   void operator()( 
const size_type i, value_type sum )
 const 
 2469 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2472 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2473 #pragma vector always 
 2475     for(size_type k=0;k<value_count;k++){
 
 2476       sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i,k), m_w(i,k) );
 
 2480   KOKKOS_INLINE_FUNCTION 
void init( value_type update)
 const 
 2482 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2485 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2486 #pragma vector always 
 2488     for(size_type k=0;k<value_count;k++)
 
 2491   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type  update ,
 
 2492                                     const volatile value_type  source )
 const 
 2494 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 
 2497 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 
 2498 #pragma vector always 
 2500     for(size_type k=0;k<value_count;k++){
 
 2501       update[k] += source[k];
 
 2506 template<
class rVector, 
class WeightVector, 
class XVector>
 
 2507 rVector MV_DotWeighted(
const rVector &r, 
const WeightVector & w, 
const XVector & x, 
int n = -1)
 
 2510     n = x.dimension_0 ();
 
 2513   typedef MV_DotWeighted_Functor<WeightVector, XVector, WeightVector::Rank> functor_type;
 
 2521 template<
class RVector, 
class aVector, 
class XVector>
 
 2522 struct V_MulScalarFunctor
 
 2524   typedef typename XVector::device_type        device_type;
 
 2525   typedef typename XVector::size_type            size_type;
 
 2528   typename XVector::const_type m_x ;
 
 2529   typename aVector::const_type m_a ;
 
 2532   KOKKOS_INLINE_FUNCTION
 
 2533   void operator()( 
const size_type i)
 const 
 2535     m_r(i) = m_a[0]*m_x(i);
 
 2539 template<
class aVector, 
class XVector>
 
 2540 struct V_MulScalarFunctorSelf
 
 2542   typedef typename XVector::device_type        device_type;
 
 2543   typedef typename XVector::size_type            size_type;
 
 2546   typename aVector::const_type   m_a ;
 
 2549   KOKKOS_INLINE_FUNCTION
 
 2550   void operator()( 
const size_type i)
 const 
 2556 template<
class RVector, 
class DataType,
class Layout,
class Device, 
class MemoryManagement,
class Specialisation, 
class XVector>
 
 2561     V_MulScalarFunctorSelf<aVector,XVector> op ;
 
 2568   V_MulScalarFunctor<RVector,aVector,XVector> op ;
 
 2576 template<
class RVector, 
class XVector>
 
 2577 struct V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector>
 
 2579   typedef typename XVector::device_type        device_type;
 
 2580   typedef typename XVector::size_type            size_type;
 
 2583   typename XVector::const_type m_x ;
 
 2584   typename XVector::value_type m_a ;
 
 2587   KOKKOS_INLINE_FUNCTION
 
 2588   void operator()( 
const size_type i)
 const 
 2590     m_r(i) = m_a*m_x(i);
 
 2594 template<
class XVector>
 
 2595 struct V_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
 
 2597   typedef typename XVector::device_type        device_type;
 
 2598   typedef typename XVector::size_type            size_type;
 
 2601   typename XVector::non_const_value_type m_a ;
 
 2604   KOKKOS_INLINE_FUNCTION
 
 2605   void operator()( 
const size_type i)
 const 
 2612 template<
class RVector, 
class XVector>
 
 2613 RVector V_MulScalar( 
const RVector & r, 
const typename XVector::non_const_value_type &a, 
const XVector & x)
 
 2616     V_MulScalarFunctorSelf<typename RVector::value_type,RVector> op ;
 
 2623   V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
 
 2631 template<
class RVector, 
class XVector, 
class YVector, 
int scalar_x, 
int scalar_y>
 
 2632 struct V_AddVectorFunctor
 
 2634   typedef typename RVector::device_type        device_type;
 
 2635   typedef typename RVector::size_type            size_type;
 
 2636   typedef typename XVector::non_const_value_type           value_type;
 
 2638   typename XVector::const_type  m_x ;
 
 2639   typename YVector::const_type   m_y ;
 
 2640   const value_type m_a;
 
 2641   const value_type m_b;
 
 2644   V_AddVectorFunctor(
const RVector& r, 
const value_type& a,
const XVector& x,
const value_type& b,
const YVector& y):
 
 2645           m_r(r),m_x(x),m_y(y),m_a(a),m_b(b)
 
 2648   KOKKOS_INLINE_FUNCTION
 
 2649   void operator()( 
const size_type i )
 const 
 2651         if((scalar_x==1)&&(scalar_y==1))
 
 2652             m_r(i) = m_x(i) + m_y(i);
 
 2653         if((scalar_x==1)&&(scalar_y==-1))
 
 2654             m_r(i) = m_x(i) - m_y(i);
 
 2655         if((scalar_x==-1)&&(scalar_y==-1))
 
 2656             m_r(i) = -m_x(i) - m_y(i);
 
 2657         if((scalar_x==-1)&&(scalar_y==1))
 
 2658             m_r(i) = -m_x(i) + m_y(i);
 
 2659         if((scalar_x==2)&&(scalar_y==1))
 
 2660             m_r(i) = m_a*m_x(i) + m_y(i);
 
 2661         if((scalar_x==2)&&(scalar_y==-1))
 
 2662             m_r(i) = m_a*m_x(i) - m_y(i);
 
 2663         if((scalar_x==1)&&(scalar_y==2))
 
 2664             m_r(i) = m_x(i) + m_b*m_y(i);
 
 2665         if((scalar_x==-1)&&(scalar_y==2))
 
 2666             m_r(i) = -m_x(i) + m_b*m_y(i);
 
 2667         if((scalar_x==2)&&(scalar_y==2))
 
 2668             m_r(i) = m_a*m_x(i) + m_b*m_y(i);
 
 2672 template<
class RVector, 
class XVector, 
int scalar_x>
 
 2673 struct V_AddVectorSelfFunctor
 
 2675   typedef typename RVector::device_type        device_type;
 
 2676   typedef typename RVector::size_type            size_type;
 
 2677   typedef typename XVector::non_const_value_type      value_type;
 
 2679   typename XVector::const_type  m_x ;
 
 2680   const value_type m_a;
 
 2682   V_AddVectorSelfFunctor(
const RVector& r, 
const value_type& a,
const XVector& x):
 
 2683     m_r(r),m_x(x),m_a(a)
 
 2686   KOKKOS_INLINE_FUNCTION
 
 2687   void operator()( 
const size_type i )
 const 
 2694       m_r(i) += m_a*m_x(i);
 
 2697 template<
class RVector, 
class XVector, 
class YVector, 
int doalpha, 
int dobeta>
 
 2698 RVector V_AddVector( 
const RVector & r,
const typename XVector::non_const_value_type &av,
const XVector & x,
 
 2699                 const typename XVector::non_const_value_type &bv, 
const YVector & y,
int n=-1)
 
 2701   if(n == -1) n = x.dimension_0();
 
 2702   if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) {
 
 2703     V_AddVectorSelfFunctor<RVector,YVector,dobeta> f(r,bv,y);
 
 2705   } 
else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) {
 
 2706     V_AddVectorSelfFunctor<RVector,XVector,doalpha> f(r,av,x);
 
 2709     V_AddVectorFunctor<RVector,XVector,YVector,doalpha,dobeta> f(r,av,x,bv,y);
 
 2715 template<
class RVector, 
class XVector, 
class YVector>
 
 2716 RVector V_AddVector( 
const RVector & r,
const typename XVector::non_const_value_type &av,
const XVector & x,
 
 2717                 const typename YVector::non_const_value_type &bv, 
const YVector & y, 
int n = -1,
 
 2722                   V_AddVector<RVector,XVector,YVector,-1,-1>(r,av,x,bv,y,n);
 
 2724                   V_AddVector<RVector,XVector,YVector,-1,0>(r,av,x,bv,y,n);
 
 2726               V_AddVector<RVector,XVector,YVector,-1,1>(r,av,x,bv,y,n);
 
 2728               V_AddVector<RVector,XVector,YVector,-1,2>(r,av,x,bv,y,n);
 
 2731                   V_AddVector<RVector,XVector,YVector,0,-1>(r,av,x,bv,y,n);
 
 2733                   V_AddVector<RVector,XVector,YVector,0,0>(r,av,x,bv,y,n);
 
 2735               V_AddVector<RVector,XVector,YVector,0,1>(r,av,x,bv,y,n);
 
 2737               V_AddVector<RVector,XVector,YVector,0,2>(r,av,x,bv,y,n);
 
 2740                   V_AddVector<RVector,XVector,YVector,1,-1>(r,av,x,bv,y,n);
 
 2742                   V_AddVector<RVector,XVector,YVector,1,0>(r,av,x,bv,y,n);
 
 2744               V_AddVector<RVector,XVector,YVector,1,1>(r,av,x,bv,y,n);
 
 2746               V_AddVector<RVector,XVector,YVector,1,2>(r,av,x,bv,y,n);
 
 2749                   V_AddVector<RVector,XVector,YVector,2,-1>(r,av,x,bv,y,n);
 
 2751                   V_AddVector<RVector,XVector,YVector,2,0>(r,av,x,bv,y,n);
 
 2753               V_AddVector<RVector,XVector,YVector,2,1>(r,av,x,bv,y,n);
 
 2755               V_AddVector<RVector,XVector,YVector,2,2>(r,av,x,bv,y,n);
 
 2760 template<
class RVector,
class XVector,
class YVector>
 
 2761 RVector V_Add( 
const RVector & r, 
const XVector & x, 
const YVector & y, 
int n=-1)
 
 2763         return V_AddVector( r,1,x,1,y,n,1,1);
 
 2766 template<
class RVector,
class XVector,
class YVector>
 
 2767 RVector V_Add( 
const RVector & r, 
const XVector & x, 
const typename XVector::non_const_value_type  & bv, 
const YVector & y,
int n=-1 )
 
 2773   return V_AddVector(r,bv,x,bv,y,n,1,b);
 
 2776 template<
class RVector,
class XVector,
class YVector>
 
 2777 RVector V_Add( 
const RVector & r, 
const typename XVector::non_const_value_type  & av, 
const XVector & x, 
const typename XVector::non_const_value_type  & bv, 
const YVector & y,
int n=-1 )
 
 2788   return V_AddVector(r,av,x,bv,y,n,a,b);
 
 2791 template<
class XVector, 
class YVector>
 
 2794   typedef typename XVector::device_type          device_type;
 
 2795   typedef typename XVector::size_type              size_type;
 
 2796   typedef typename XVector::non_const_value_type xvalue_type;
 
 2797   typedef Details::InnerProductSpaceTraits<xvalue_type>  IPT;
 
 2803   V_DotFunctor(
const XVector& x,
const YVector& y):
 
 2807   KOKKOS_INLINE_FUNCTION
 
 2808   void operator()( 
const size_type &i, value_type &sum )
 const 
 2813   KOKKOS_INLINE_FUNCTION
 
 2814   void init (
volatile value_type& update)
 const 
 2819   KOKKOS_INLINE_FUNCTION
 
 2820   void join( 
volatile value_type &update ,
 
 2821              const volatile value_type &source )
 const 
 2827 template<
class XVector, 
class YVector>
 
 2828 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
 
 2829 V_Dot( 
const XVector & x, 
const YVector & y, 
int n = -1)
 
 2831   typedef V_DotFunctor<XVector,YVector> Functor;
 
 2833   if (n<0) n = x.dimension_0();
 
 2834   typename Functor::value_type ret_val;
 
 2839 template<
class WeightVector, 
class XVector>
 
 2840 struct V_DotWeighted_Functor
 
 2842   typedef typename XVector::device_type device_type;
 
 2843   typedef typename XVector::size_type size_type;
 
 2844   typedef typename XVector::non_const_value_type xvalue_type;
 
 2845   typedef typename WeightVector::non_const_value_type wvalue_type;
 
 2846   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
 
 2847   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
 
 2850   typename WeightVector::const_type m_w;
 
 2851   typename XVector::const_type m_x;
 
 2853   V_DotWeighted_Functor (WeightVector w, XVector x) :
 
 2857   KOKKOS_INLINE_FUNCTION
 
 2858   void operator() (
const size_type i, value_type& sum)
 const {
 
 2862   KOKKOS_INLINE_FUNCTION 
void init (value_type& update)
 const {
 
 2866   KOKKOS_INLINE_FUNCTION 
void 
 2867   join (
volatile value_type& update,
 
 2868         const volatile value_type& source)
 const 
 2874 template<
class WeightVector, 
class XVector>
 
 2875 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
 
 2876 V_DotWeighted(
const WeightVector & w, 
const XVector & x, 
int n = -1)
 
 2879     n = x.dimension_0 ();
 
 2882   typedef Details::InnerProductSpaceTraits<typename XVector::non_const_value_type> IPT;
 
 2883   typedef typename IPT::dot_type value_type;
 
 2886   typedef V_DotWeighted_Functor<WeightVector,XVector> functor_type;
 
 2894 template<
class XVector>
 
 2895 struct V_Sum_Functor
 
 2897   typedef typename XVector::device_type        device_type;
 
 2898   typedef typename XVector::size_type            size_type;
 
 2899   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2900   typedef xvalue_type                           value_type;
 
 2902   typename XVector::const_type m_x ;
 
 2904   V_Sum_Functor(XVector x):m_x(x) {}
 
 2907   KOKKOS_INLINE_FUNCTION
 
 2908   void operator()( 
const size_type i, value_type& sum )
 const 
 2913   KOKKOS_INLINE_FUNCTION
 
 2914   void init( value_type& update)
 const 
 2919   KOKKOS_INLINE_FUNCTION
 
 2920   void join( 
volatile value_type&  update ,
 
 2921                                     const volatile value_type&  source )
 const 
 2928 template<
class VectorType>
 
 2929 typename VectorType::non_const_value_type
 
 2930 V_Sum (
const VectorType& x, 
int n = -1)
 
 2933     n = x.dimension_0 ();
 
 2936   typedef typename VectorType::non_const_value_type value_type;
 
 2945 template<
class XVector>
 
 2946 struct V_Norm1_Functor
 
 2948   typedef typename XVector::device_type        device_type;
 
 2949   typedef typename XVector::size_type            size_type;
 
 2950   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2951   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 2954   typename XVector::const_type m_x ;
 
 2956   V_Norm1_Functor(XVector x):m_x(x) {}
 
 2958   KOKKOS_INLINE_FUNCTION
 
 2959   void operator()( 
const size_type i, value_type& sum )
 const 
 2963   KOKKOS_INLINE_FUNCTION 
void init (value_type& update)
 const 
 2967   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type&  update ,
 
 2968                                     const volatile value_type&  source )
 const 
 2974 template<
class VectorType>
 
 2975 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
 
 2976 V_Norm1( 
const VectorType & x, 
int n = -1)
 
 2979     n = x.dimension_0 ();
 
 2982   typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
 
 2983   typedef typename IPT::dot_type value_type;
 
 2991 template<
class XVector>
 
 2992 struct V_NormInf_Functor
 
 2994   typedef typename XVector::device_type        device_type;
 
 2995   typedef typename XVector::size_type            size_type;
 
 2996   typedef typename XVector::non_const_value_type          xvalue_type;
 
 2997   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
 
 3000   typename XVector::const_type m_x ;
 
 3002   V_NormInf_Functor(XVector x):m_x(x) {}
 
 3004   KOKKOS_INLINE_FUNCTION
 
 3005   void operator()( 
const size_type i, value_type& sum )
 const 
 3010   KOKKOS_INLINE_FUNCTION 
void init (value_type& update)
 const 
 3014   KOKKOS_INLINE_FUNCTION 
void join( 
volatile value_type&  update ,
 
 3015                                     const volatile value_type&  source )
 const 
 3017     update = MAX(update,source);
 
 3021 template<
class VectorType>
 
 3022 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
 
 3023 V_NormInf (
const VectorType& x, 
int n = -1)
 
 3026     n = x.dimension_0 ();
 
 3029   typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
 
 3030   typedef typename IPT::dot_type value_type;
 
 3039 template<
class RVector, 
class XVector>
 
 3040 struct V_ReciprocalFunctor
 
 3042   typedef typename XVector::device_type        device_type;
 
 3043   typedef typename XVector::size_type            size_type;
 
 3046   typename XVector::const_type m_x ;
 
 3048   V_ReciprocalFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
 
 3051   KOKKOS_INLINE_FUNCTION
 
 3052   void operator()( 
const size_type i)
 const 
 3058 template<
class XVector>
 
 3059 struct V_ReciprocalSelfFunctor
 
 3061   typedef typename XVector::device_type        device_type;
 
 3062   typedef typename XVector::size_type            size_type;
 
 3066   V_ReciprocalSelfFunctor(XVector x):m_x(x) {}
 
 3069   KOKKOS_INLINE_FUNCTION
 
 3070   void operator()( 
const size_type i)
 const 
 3076 template<
class RVector, 
class XVector>
 
 3077 RVector V_Reciprocal( 
const RVector & r, 
const XVector & x)
 
 3086     V_ReciprocalSelfFunctor<XVector> op(x) ;
 
 3091   V_ReciprocalFunctor<RVector,XVector> op(r,x) ;
 
 3099 template<
class XVector>
 
 3100 struct V_ReciprocalThresholdSelfFunctor
 
 3102   typedef typename XVector::device_type           device_type;
 
 3103   typedef typename XVector::size_type               size_type;
 
 3104   typedef typename XVector::non_const_value_type   value_type;
 
 3109   const value_type m_min_val;
 
 3110   const value_type m_min_val_mag;
 
 3112   V_ReciprocalThresholdSelfFunctor(
const XVector& x, 
const value_type& min_val) :
 
 3113     m_x(x), m_min_val(min_val), m_min_val_mag(KAT::
abs(min_val)) {}
 
 3116   KOKKOS_INLINE_FUNCTION
 
 3117   void operator()( 
const size_type i)
 const 
 3119     if (
KAT::abs(m_x(i)) < m_min_val_mag)
 
 3126 template<
class XVector>
 
 3127 XVector V_ReciprocalThreshold( 
const XVector & x, 
const typename XVector::non_const_value_type& min_val )
 
 3129   V_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val) ;
 
 3137 template<
class RVector, 
class XVector>
 
 3140   typedef typename XVector::device_type        device_type;
 
 3141   typedef typename XVector::size_type            size_type;
 
 3144   typename XVector::const_type m_x ;
 
 3146   V_AbsFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
 
 3149   KOKKOS_INLINE_FUNCTION
 
 3150   void operator()( 
const size_type i)
 const 
 3156 template<
class XVector>
 
 3157 struct V_AbsSelfFunctor
 
 3159   typedef typename XVector::device_type        device_type;
 
 3160   typedef typename XVector::size_type            size_type;
 
 3164   V_AbsSelfFunctor(XVector x):m_x(x) {}
 
 3167   KOKKOS_INLINE_FUNCTION
 
 3168   void operator()( 
const size_type i)
 const 
 3174 template<
class RVector, 
class XVector>
 
 3175 RVector V_Abs( 
const RVector & r, 
const XVector & x)
 
 3184     V_AbsSelfFunctor<XVector> op(x) ;
 
 3189   V_AbsFunctor<RVector,XVector> op(r,x) ;
 
 3197 template<
class CVector, 
class AVector, 
class BVector>
 
 3198 struct V_ElementWiseMultiplyFunctor
 
 3200   typedef typename CVector::device_type        device_type;
 
 3201   typedef typename CVector::size_type            size_type;
 
 3203   typename CVector::const_value_type m_c;
 
 3205   typename AVector::const_value_type m_ab;
 
 3206   typename AVector::const_type m_A ;
 
 3207   typename BVector::const_type m_B ;
 
 3209   V_ElementWiseMultiplyFunctor(
 
 3210       typename CVector::const_value_type c,
 
 3212       typename AVector::const_value_type ab,
 
 3213       typename AVector::const_type A,
 
 3214       typename BVector::const_type B):
 
 3215       m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B)
 
 3219   KOKKOS_INLINE_FUNCTION
 
 3220   void operator()( 
const size_type i)
 const 
 3222      m_C(i) = m_c*m_C(i) + m_ab*m_A(i)*m_B(i);
 
 3227 template<
class CVector, 
class AVector, 
class BVector>
 
 3228 CVector V_ElementWiseMultiply(
 
 3229       typename CVector::const_value_type c,
 
 3231       typename AVector::const_value_type ab,
 
 3252   V_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B) ;
 
static KOKKOS_FORCEINLINE_FUNCTION T sqrt(const T &x)
The square root of x. 
static KOKKOS_FORCEINLINE_FUNCTION mag_type abs(const T &x)
The absolute value (magnitude) of x. 
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices. 
ArithTraits< T >::mag_type mag_type
The type returned by norm(x) for a value x of type T. 
static KOKKOS_FORCEINLINE_FUNCTION T one()
The one value of T; the multiplicative identity. 
T dot_type
The type returned by dot(x,y) for values x and y of type T. 
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction. 
static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const T &x, const T &y)
The "dot product" of two values x and y of type T. 
KOKKOS_INLINE_FUNCTION RealType abs(const complex< RealType > &x)
Absolute value (magnitude) of a complex number. 
View to an array of data. 
static KOKKOS_FORCEINLINE_FUNCTION T zero()
The zero value of T; the arithmetic identity. 
Traits class for inner product space operations on type T. 
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const T &x)
The "norm" (absolute value or magnitude) of a value x of type T. 
Declaration and definition of Kokkos::Details::InnerProductSpaceTraits. 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy. 
T mag_type
The type of the magnitude (absolute value) of T. 
Traits class for arithmetic on type T.