1 #ifndef KOKKOS_MULTIVECTOR_H_
2 #define KOKKOS_MULTIVECTOR_H_
4 #include <Kokkos_Core.hpp>
11 #define MAX(a,b) (a<b?b:a)
16 template<
typename Scalar,
class device>
17 struct MultiVectorDynamic{
18 #ifdef KOKKOS_USE_CUSPARSE
24 typedef typename device::array_layout layout;
30 MultiVectorDynamic() {}
31 ~MultiVectorDynamic() {}
34 template<
typename Scalar,
class device,
int n>
35 struct MultiVectorStatic{
36 typedef Scalar scalar;
37 typedef typename device::array_layout layout;
41 MultiVectorStatic() {}
42 ~MultiVectorStatic() {}
50 template<
class RVector,
class aVector,
class XVector>
51 struct MV_MulScalarFunctor
53 typedef typename XVector::device_type device_type;
54 typedef typename XVector::size_type size_type;
57 typename XVector::const_type m_x ;
58 typename aVector::const_type m_a ;
60 MV_MulScalarFunctor() {n=1;}
63 KOKKOS_INLINE_FUNCTION
64 void operator()(
const size_type i)
const
66 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
69 for(size_type k=0;k<n;k++)
70 m_r(i,k) = m_a[k]*m_x(i,k);
74 template<
class aVector,
class XVector>
75 struct MV_MulScalarFunctorSelf
77 typedef typename XVector::device_type device_type;
78 typedef typename XVector::size_type size_type;
81 typename aVector::const_type m_a ;
85 KOKKOS_INLINE_FUNCTION
86 void operator()(
const size_type i)
const
88 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
91 for(size_type k=0;k<n;k++)
96 template<
class RVector,
class DataType,
class Layout,
class Device,
class MemoryManagement,
class Specialisation,
class XVector>
101 MV_MulScalarFunctorSelf<aVector,XVector> op ;
104 op.n = x.dimension(1);
109 MV_MulScalarFunctor<RVector,aVector,XVector> op ;
113 op.n = x.dimension(1);
118 template<
class RVector,
class XVector>
119 struct MV_MulScalarFunctor<RVector,typename RVector::value_type,XVector>
121 typedef typename XVector::device_type device_type;
122 typedef typename XVector::size_type size_type;
125 typename XVector::const_type m_x ;
126 typename RVector::value_type m_a ;
128 MV_MulScalarFunctor() {n=1;}
131 KOKKOS_INLINE_FUNCTION
132 void operator()(
const size_type i)
const
134 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
137 for(size_type k=0;k<n;k++)
138 m_r(i,k) = m_a*m_x(i,k);
142 template<
class XVector>
143 struct MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
145 typedef typename XVector::device_type device_type;
146 typedef typename XVector::size_type size_type;
149 typename XVector::non_const_value_type m_a ;
153 KOKKOS_INLINE_FUNCTION
154 void operator()(
const size_type i)
const
156 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
159 for(size_type k=0;k<n;k++)
164 template<
class RVector,
class XVector>
165 RVector MV_MulScalar(
const RVector & r,
const typename XVector::non_const_value_type &a,
const XVector & x)
176 MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> op ;
179 op.n = x.dimension(1);
184 MV_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
188 op.n = x.dimension(1);
196 template<
class RVector,
class XVector>
197 struct MV_ReciprocalFunctor
199 typedef typename XVector::device_type device_type;
200 typedef typename XVector::size_type size_type;
203 typename XVector::const_type m_x ;
206 MV_ReciprocalFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
209 KOKKOS_INLINE_FUNCTION
210 void operator()(
const size_type i)
const
212 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
215 for(size_type k=0;k<m_n;k++)
220 template<
class XVector>
221 struct MV_ReciprocalSelfFunctor
223 typedef typename XVector::device_type device_type;
224 typedef typename XVector::size_type size_type;
229 MV_ReciprocalSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
232 KOKKOS_INLINE_FUNCTION
233 void operator()(
const size_type i)
const
235 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
238 for(size_type k=0;k<m_n;k++)
243 template<
class RVector,
class XVector>
244 RVector MV_Reciprocal(
const RVector & r,
const XVector & x)
262 MV_ReciprocalSelfFunctor<XVector> op(x,x.dimension_1()) ;
267 MV_ReciprocalFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
275 template<
class XVector>
276 struct MV_ReciprocalThresholdSelfFunctor
278 typedef typename XVector::device_type device_type;
279 typedef typename XVector::size_type size_type;
280 typedef typename XVector::non_const_value_type value_type;
285 const value_type m_min_val;
286 const value_type m_min_val_mag;
289 MV_ReciprocalThresholdSelfFunctor(
const XVector& x,
const value_type& min_val,
const size_type n) :
290 m_x(x), m_min_val(min_val), m_min_val_mag(KAT::
abs(min_val)), m_n(n) {}
293 KOKKOS_INLINE_FUNCTION
294 void operator()(
const size_type i)
const
296 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
299 for(size_type k=0;k<m_n;k++) {
300 if (
KAT::abs(m_x(i,k)) < m_min_val_mag)
301 m_x(i,k) = m_min_val;
308 template<
class XVector>
309 XVector MV_ReciprocalThreshold(
const XVector & x,
const typename XVector::non_const_value_type& min_val )
311 MV_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val,x.dimension_1()) ;
319 template<
class RVector,
class XVector>
322 typedef typename XVector::device_type device_type;
323 typedef typename XVector::size_type size_type;
326 typename XVector::const_type m_x ;
329 MV_AbsFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
332 KOKKOS_INLINE_FUNCTION
333 void operator()(
const size_type i)
const
335 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
338 for(size_type k=0;k<m_n;k++)
343 template<
class XVector>
344 struct MV_AbsSelfFunctor
346 typedef typename XVector::device_type device_type;
347 typedef typename XVector::size_type size_type;
352 MV_AbsSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
355 KOKKOS_INLINE_FUNCTION
356 void operator()(
const size_type i)
const
358 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
361 for(size_type k=0;k<m_n;k++)
366 template<
class RVector,
class XVector>
367 RVector MV_Abs(
const RVector & r,
const XVector & x)
385 MV_AbsSelfFunctor<XVector> op(x,x.dimension_1()) ;
390 MV_AbsFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
398 template<
class CVector,
class AVector,
class BVector>
399 struct MV_ElementWiseMultiplyFunctor
401 typedef typename CVector::device_type device_type;
402 typedef typename CVector::size_type size_type;
404 typename CVector::const_value_type m_c;
406 typename AVector::const_value_type m_ab;
407 typename AVector::const_type m_A ;
408 typename BVector::const_type m_B ;
411 MV_ElementWiseMultiplyFunctor(
412 typename CVector::const_value_type c,
414 typename AVector::const_value_type ab,
415 typename AVector::const_type A,
416 typename BVector::const_type B,
418 m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B),m_n(n)
422 KOKKOS_INLINE_FUNCTION
423 void operator()(
const size_type i)
const
425 typename AVector::const_value_type Ai = m_A(i);
426 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
429 for(size_type k=0;k<m_n;k++)
430 m_C(i,k) = m_c*m_C(i,k) + m_ab*Ai*m_B(i,k);
435 template<
class CVector,
class AVector,
class BVector>
436 CVector MV_ElementWiseMultiply(
437 typename CVector::const_value_type c,
439 typename AVector::const_value_type ab,
460 MV_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B,C.dimension_1()) ;
472 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector,
int scalar_x,
int scalar_y,
int UNROLL>
473 struct MV_AddUnrollFunctor
475 typedef typename RVector::device_type device_type;
476 typedef typename RVector::size_type size_type;
486 MV_AddUnrollFunctor() {n=UNROLL;}
489 KOKKOS_INLINE_FUNCTION
490 void operator()(
const size_type i )
const
492 if((scalar_x==1)&&(scalar_y==1)){
493 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
496 for(size_type k=0;k<UNROLL;k++)
497 m_r(i,k) = m_x(i,k) + m_y(i,k);
499 if((scalar_x==1)&&(scalar_y==-1)){
500 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
503 for(size_type k=0;k<UNROLL;k++)
504 m_r(i,k) = m_x(i,k) - m_y(i,k);
506 if((scalar_x==-1)&&(scalar_y==-1)){
507 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
510 for(size_type k=0;k<UNROLL;k++)
511 m_r(i,k) = -m_x(i,k) - m_y(i,k);
513 if((scalar_x==-1)&&(scalar_y==1)){
514 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
517 for(size_type k=0;k<UNROLL;k++)
518 m_r(i,k) = -m_x(i,k) + m_y(i,k);
520 if((scalar_x==2)&&(scalar_y==1)){
521 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
524 for(size_type k=0;k<UNROLL;k++)
525 m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
527 if((scalar_x==2)&&(scalar_y==-1)){
528 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
531 for(size_type k=0;k<UNROLL;k++)
532 m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
534 if((scalar_x==1)&&(scalar_y==2)){
535 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
538 for(size_type k=0;k<UNROLL;k++)
539 m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
541 if((scalar_x==-1)&&(scalar_y==2)){
542 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
545 for(size_type k=0;k<UNROLL;k++)
546 m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
548 if((scalar_x==2)&&(scalar_y==2)){
549 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
552 for(size_type k=0;k<UNROLL;k++)
553 m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
558 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector,
int scalar_x,
int scalar_y>
559 struct MV_AddVectorFunctor
561 typedef typename RVector::device_type device_type;
562 typedef typename RVector::size_type size_type;
571 MV_AddVectorFunctor() {n=1;}
574 KOKKOS_INLINE_FUNCTION
575 void operator()(
const size_type i )
const
577 if((scalar_x==1)&&(scalar_y==1))
578 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
581 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
582 #pragma vector always
584 for(size_type k=0;k<n;k++)
585 m_r(i,k) = m_x(i,k) + m_y(i,k);
586 if((scalar_x==1)&&(scalar_y==-1))
587 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
590 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
591 #pragma vector always
593 for(size_type k=0;k<n;k++)
594 m_r(i,k) = m_x(i,k) - m_y(i,k);
595 if((scalar_x==-1)&&(scalar_y==-1))
596 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
599 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
600 #pragma vector always
602 for(size_type k=0;k<n;k++)
603 m_r(i,k) = -m_x(i,k) - m_y(i,k);
604 if((scalar_x==-1)&&(scalar_y==1))
605 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
608 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
609 #pragma vector always
611 for(size_type k=0;k<n;k++)
612 m_r(i,k) = -m_x(i,k) + m_y(i,k);
613 if((scalar_x==2)&&(scalar_y==1))
614 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
617 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
618 #pragma vector always
620 for(size_type k=0;k<n;k++)
621 m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
622 if((scalar_x==2)&&(scalar_y==-1))
623 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
626 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
627 #pragma vector always
629 for(size_type k=0;k<n;k++)
630 m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
631 if((scalar_x==1)&&(scalar_y==2))
632 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
635 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
636 #pragma vector always
638 for(size_type k=0;k<n;k++)
639 m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
640 if((scalar_x==-1)&&(scalar_y==2))
641 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
644 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
645 #pragma vector always
647 for(size_type k=0;k<n;k++)
648 m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
649 if((scalar_x==2)&&(scalar_y==2))
650 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
653 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
654 #pragma vector always
656 for(size_type k=0;k<n;k++)
657 m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
664 template<
class RVector,
class XVector,
class YVector,
int scalar_x,
int scalar_y,
int UNROLL>
665 struct MV_AddUnrollFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y,UNROLL>
667 typedef typename RVector::device_type device_type;
668 typedef typename RVector::size_type size_type;
673 typename XVector::non_const_value_type m_a;
674 typename YVector::non_const_value_type m_b;
678 MV_AddUnrollFunctor() {n=UNROLL;}
681 KOKKOS_INLINE_FUNCTION
682 void operator()(
const size_type i )
const
684 if((scalar_x==1)&&(scalar_y==1)){
685 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
688 for(size_type k=0;k<UNROLL;k++)
689 m_r(i,k) = m_x(i,k) + m_y(i,k);
691 if((scalar_x==1)&&(scalar_y==-1)){
692 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
695 for(size_type k=0;k<UNROLL;k++)
696 m_r(i,k) = m_x(i,k) - m_y(i,k);
698 if((scalar_x==-1)&&(scalar_y==-1)){
699 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
702 for(size_type k=0;k<UNROLL;k++)
703 m_r(i,k) = -m_x(i,k) - m_y(i,k);
705 if((scalar_x==-1)&&(scalar_y==1)){
706 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
709 for(size_type k=0;k<UNROLL;k++)
710 m_r(i,k) = -m_x(i,k) + m_y(i,k);
712 if((scalar_x==2)&&(scalar_y==1)){
713 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
716 for(size_type k=0;k<UNROLL;k++)
717 m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
719 if((scalar_x==2)&&(scalar_y==-1)){
720 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
723 for(size_type k=0;k<UNROLL;k++)
724 m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
726 if((scalar_x==1)&&(scalar_y==2)){
727 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
730 for(size_type k=0;k<UNROLL;k++)
731 m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
733 if((scalar_x==-1)&&(scalar_y==2)){
734 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
737 for(size_type k=0;k<UNROLL;k++)
738 m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
740 if((scalar_x==2)&&(scalar_y==2)){
741 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
744 for(size_type k=0;k<UNROLL;k++)
745 m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
750 template<
class RVector,
class XVector,
class YVector,
int scalar_x,
int scalar_y>
751 struct MV_AddVectorFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y>
753 typedef typename RVector::device_type device_type;
754 typedef typename RVector::size_type size_type;
759 typename XVector::non_const_value_type m_a;
760 typename YVector::non_const_value_type m_b;
763 MV_AddVectorFunctor() {n=1;}
766 KOKKOS_INLINE_FUNCTION
767 void operator()(
const size_type i )
const
769 if((scalar_x==1)&&(scalar_y==1))
770 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
773 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
774 #pragma vector always
776 for(size_type k=0;k<n;k++)
777 m_r(i,k) = m_x(i,k) + m_y(i,k);
778 if((scalar_x==1)&&(scalar_y==-1))
779 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
782 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
783 #pragma vector always
785 for(size_type k=0;k<n;k++)
786 m_r(i,k) = m_x(i,k) - m_y(i,k);
787 if((scalar_x==-1)&&(scalar_y==-1))
788 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
791 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
792 #pragma vector always
794 for(size_type k=0;k<n;k++)
795 m_r(i,k) = -m_x(i,k) - m_y(i,k);
796 if((scalar_x==-1)&&(scalar_y==1))
797 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
800 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
801 #pragma vector always
803 for(size_type k=0;k<n;k++)
804 m_r(i,k) = -m_x(i,k) + m_y(i,k);
805 if((scalar_x==2)&&(scalar_y==1))
806 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
809 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
810 #pragma vector always
812 for(size_type k=0;k<n;k++)
813 m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
814 if((scalar_x==2)&&(scalar_y==-1))
815 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
818 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
819 #pragma vector always
821 for(size_type k=0;k<n;k++)
822 m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
823 if((scalar_x==1)&&(scalar_y==2))
824 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
827 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
828 #pragma vector always
830 for(size_type k=0;k<n;k++)
831 m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
832 if((scalar_x==-1)&&(scalar_y==2))
833 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
836 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
837 #pragma vector always
839 for(size_type k=0;k<n;k++)
840 m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
841 if((scalar_x==2)&&(scalar_y==2))
842 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
845 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
846 #pragma vector always
848 for(size_type k=0;k<n;k++)
849 m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
854 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector,
int UNROLL>
855 RVector MV_AddUnroll(
const RVector & r,
const aVector &av,
const XVector & x,
856 const bVector &bv,
const YVector & y,
int n,
860 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,1,UNROLL> op ;
866 op.n = x.dimension(1);
871 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,-1,UNROLL> op ;
877 op.n = x.dimension(1);
882 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,1,UNROLL> op ;
888 op.n = x.dimension(1);
893 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1,UNROLL> op ;
899 op.n = x.dimension(1);
904 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,1,UNROLL> op ;
910 op.n = x.dimension(1);
915 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,-1,UNROLL> op ;
921 op.n = x.dimension(1);
926 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,2,UNROLL> op ;
932 op.n = x.dimension(1);
937 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,2,UNROLL> op ;
943 op.n = x.dimension(1);
947 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,2,UNROLL> op ;
953 op.n = x.dimension(1);
959 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector>
960 RVector MV_AddUnroll(
const RVector & r,
const aVector &av,
const XVector & x,
961 const bVector &bv,
const YVector & y,
int n,
964 switch (x.dimension(1)){
965 case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,n,a,b);
967 case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,n,a,b);
969 case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,n,a,b);
971 case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,n,a,b);
973 case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,n,a,b);
975 case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,n,a,b);
977 case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,n,a,b);
979 case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,n,a,b);
981 case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,n,a,b);
983 case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,n,a,b);
985 case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,n,a,b);
987 case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,n,a,b);
989 case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,n,a,b);
991 case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,n,a,b);
993 case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,n,a,b);
995 case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,n,a,b);
1002 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector>
1003 RVector MV_AddVector(
const RVector & r,
const aVector &av,
const XVector & x,
1004 const bVector &bv,
const YVector & y,
int n,
1008 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ;
1014 op.n = x.dimension(1);
1019 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ;
1025 op.n = x.dimension(1);
1030 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ;
1036 op.n = x.dimension(1);
1041 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ;
1047 op.n = x.dimension(1);
1052 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ;
1058 op.n = x.dimension(1);
1063 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ;
1069 op.n = x.dimension(1);
1074 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ;
1080 op.n = x.dimension(1);
1085 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ;
1091 op.n = x.dimension(1);
1095 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ;
1101 op.n = x.dimension(1);
1107 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector>
1108 RVector MV_AddSpecialise(
const RVector & r,
const aVector &av,
const XVector & x,
1109 const bVector &bv,
const YVector & y,
unsigned int n,
1113 if(x.dimension(1)>16)
1114 return MV_AddVector( r,av,x,bv,y,a,b);
1116 if(x.dimension_1()==1) {
1117 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1118 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1119 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1121 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1122 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1123 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1125 V_Add(r_1d,av,x_1d,bv,y_1d,n);
1128 return MV_AddUnroll( r,av,x,bv,y,a,b);
1131 template<
class RVector,
class aVector,
class XVector,
class bVector,
class YVector>
1132 RVector MV_Add(
const RVector & r,
const aVector &av,
const XVector & x,
1133 const bVector &bv,
const YVector & y,
int n = -1)
1135 if(n==-1) n = x.dimension_0();
1136 if(x.dimension(1)>16)
1137 return MV_AddVector( r,av,x,bv,y,n,2,2);
1139 if(x.dimension_1()==1) {
1140 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1141 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1142 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1144 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1145 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1146 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1148 V_Add(r_1d,av,x_1d,bv,y_1d,n);
1151 return MV_AddUnroll( r,av,x,bv,y,n,2,2);
1154 template<
class RVector,
class XVector,
class YVector>
1155 RVector MV_Add(
const RVector & r,
const XVector & x,
const YVector & y,
int n = -1)
1157 if(n==-1) n = x.dimension_0();
1158 if(x.dimension_1()==1) {
1159 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1160 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1161 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1163 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1164 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1165 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1167 V_Add(r_1d,x_1d,y_1d,n);
1170 typename XVector::const_value_type a = 1.0;
1171 return MV_AddSpecialise(r,a,x,a,y,n,1,1);
1175 template<
class RVector,
class XVector,
class bVector,
class YVector>
1176 RVector MV_Add(
const RVector & r,
const XVector & x,
const bVector & bv,
const YVector & y,
int n = -1 )
1178 if(n==-1) n = x.dimension_0();
1179 if(x.dimension_1()==1) {
1180 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1181 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1182 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1184 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1185 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1186 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1188 V_Add(r_1d,x_1d,bv,y_1d,n);
1191 MV_AddSpecialise(r,bv,x,bv,y,n,1,2);
1196 template<
class XVector,
class YVector>
1197 struct MV_DotProduct_Right_FunctorVector
1199 typedef typename XVector::device_type device_type;
1200 typedef typename XVector::size_type size_type;
1201 typedef typename XVector::non_const_value_type xvalue_type;
1202 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
1204 size_type value_count;
1207 typedef typename XVector::const_type x_const_type;
1208 typedef typename YVector::const_type y_const_type;
1214 KOKKOS_INLINE_FUNCTION
1215 void operator()(
const size_type i, value_type sum )
const
1217 const size_type numVecs=value_count;
1219 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1222 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1223 #pragma vector always
1225 for(size_type k=0;k<numVecs;k++)
1226 sum[k]+=
IPT::dot( m_x(i,k), m_y(i,k) );
1228 KOKKOS_INLINE_FUNCTION
void init( value_type update)
const
1230 const size_type numVecs = value_count;
1231 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1234 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1235 #pragma vector always
1237 for(size_type k=0;k<numVecs;k++)
1240 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type update ,
1241 const volatile value_type source )
const
1243 const size_type numVecs = value_count;
1244 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1247 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1248 #pragma vector always
1250 for(size_type k=0;k<numVecs;k++){
1251 update[k] += source[k];
1261 template<
class MultiVecViewType>
1262 struct MultiVecDotFunctor {
1263 typedef typename MultiVecViewType::device_type device_type;
1264 typedef typename MultiVecViewType::size_type size_type;
1265 typedef typename MultiVecViewType::value_type mv_value_type;
1269 typedef MultiVecViewType mv_view_type;
1270 typedef typename MultiVecViewType::const_type mv_const_view_type;
1273 mv_const_view_type X_, Y_;
1274 dot_view_type dots_;
1276 size_type value_count;
1278 MultiVecDotFunctor (
const mv_const_view_type& X,
1279 const mv_const_view_type& Y,
1280 const dot_view_type& dots) :
1281 X_ (X), Y_ (Y), dots_ (dots), value_count (X.dimension_1 ())
1283 if (value_count != dots.dimension_0 ()) {
1284 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1285 cuda_abort(
"Kokkos::MultiVecDotFunctor: value_count does not match the length of 'dots'");
1287 std::ostringstream os;
1288 os <<
"Kokkos::MultiVecDotFunctor: value_count does not match the length "
1289 "of 'dots'. X is " << X.dimension_0 () <<
" x " << X.dimension_1 () <<
1290 ", Y is " << Y.dimension_0 () <<
" x " << Y.dimension_1 () <<
", "
1291 "dots has length " << dots.dimension_0 () <<
", and value_count = " <<
1293 throw std::invalid_argument (os.str ());
1299 KOKKOS_INLINE_FUNCTION
void
1300 operator() (
const size_type i, value_type sum)
const
1302 const size_type numVecs = value_count;
1303 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1306 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1307 #pragma vector always
1309 for (size_type k = 0; k < numVecs; ++k) {
1310 sum[k] +=
IPT::dot (X_(i,k), Y_(i,k));
1314 KOKKOS_INLINE_FUNCTION
void
1315 init (value_type update)
const
1317 const size_type numVecs = value_count;
1318 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1321 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1322 #pragma vector always
1324 for (size_type k = 0; k < numVecs; ++k) {
1329 KOKKOS_INLINE_FUNCTION
void
1330 join (
volatile value_type update,
1331 const volatile value_type source)
const
1333 const size_type numVecs = value_count;
1334 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1337 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1338 #pragma vector always
1340 for (size_type k = 0; k < numVecs; ++k) {
1341 update[k] += source[k];
1346 KOKKOS_INLINE_FUNCTION
void
1347 final (
const value_type dst)
const
1349 const size_type numVecs = value_count;
1351 #if !defined(__CUDA_ARCH__)
1354 std::ostringstream os;
1355 os <<
"numVecs: " << numVecs <<
", dst: [";
1356 for (
size_t j = 0; j < numVecs; ++j) {
1358 if (j + 1 < numVecs) {
1362 os <<
"]" << std::endl;
1363 std::cerr << os.str ();
1367 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1370 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1371 #pragma vector always
1373 for (size_type k = 0; k < numVecs; ++k) {
1377 #if !defined(__CUDA_ARCH__)
1380 std::ostringstream os;
1381 os <<
"numVecs: " << numVecs <<
", dots_: [";
1382 for (
size_t j = 0; j < numVecs; ++j) {
1384 if (j + 1 < numVecs) {
1388 os <<
"]" << std::endl;
1389 std::cerr << os.str ();
1399 template<
class MultiVecViewType>
1400 struct MultiVecNorm2SquaredFunctor {
1401 typedef typename MultiVecViewType::device_type device_type;
1402 typedef typename MultiVecViewType::size_type size_type;
1403 typedef typename MultiVecViewType::value_type mv_value_type;
1407 typedef MultiVecViewType mv_view_type;
1408 typedef typename MultiVecViewType::const_type mv_const_view_type;
1411 mv_const_view_type X_;
1412 norms_view_type norms_;
1414 size_type value_count;
1416 MultiVecNorm2SquaredFunctor (
const mv_const_view_type& X,
1417 const norms_view_type& norms) :
1418 X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1420 if (value_count != norms.dimension_0 ()) {
1421 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1422 cuda_abort(
"Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match the length of 'norms'");
1424 std::ostringstream os;
1425 os <<
"Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match "
1426 "the length of 'norms'. X is " << X.dimension_0 () <<
" x " <<
1427 X.dimension_1 () <<
", norms has length " << norms.dimension_0 () <<
1428 ", and value_count = " << value_count <<
".";
1429 throw std::invalid_argument (os.str ());
1434 KOKKOS_INLINE_FUNCTION
void
1435 operator() (
const size_type i, value_type sum)
const
1437 const size_type numVecs = value_count;
1438 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1441 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1442 #pragma vector always
1444 for (size_type k = 0; k < numVecs; ++k) {
1446 sum[k] += tmp * tmp;
1450 KOKKOS_INLINE_FUNCTION
void
1451 init (value_type update)
const
1453 const size_type numVecs = value_count;
1454 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1457 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1458 #pragma vector always
1460 for (size_type k = 0; k < numVecs; ++k) {
1465 KOKKOS_INLINE_FUNCTION
void
1466 join (
volatile value_type update,
1467 const volatile value_type source)
const
1469 const size_type numVecs = value_count;
1470 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1473 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1474 #pragma vector always
1476 for (size_type k = 0; k < numVecs; ++k) {
1477 update[k] += source[k];
1482 KOKKOS_INLINE_FUNCTION
void
1483 final (
const value_type dst)
const
1485 const size_type numVecs = value_count;
1486 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1489 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1490 #pragma vector always
1492 for (size_type k = 0; k < numVecs; ++k) {
1502 template<
class MultiVecViewType>
1503 struct MultiVecNorm1Functor {
1504 typedef typename MultiVecViewType::device_type device_type;
1505 typedef typename MultiVecViewType::size_type size_type;
1506 typedef typename MultiVecViewType::value_type mv_value_type;
1510 typedef MultiVecViewType mv_view_type;
1511 typedef typename MultiVecViewType::const_type mv_const_view_type;
1514 mv_const_view_type X_;
1515 norms_view_type norms_;
1517 size_type value_count;
1519 MultiVecNorm1Functor (
const mv_const_view_type& X,
1520 const norms_view_type& norms) :
1521 X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1523 if (value_count != norms.dimension_0 ()) {
1524 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1525 cuda_abort(
"Kokkos::MultiVecNorm1Functor: value_count does not match the length of 'norms'");
1527 std::ostringstream os;
1528 os <<
"Kokkos::MultiVecNorm1Functor: value_count does not match the "
1529 <<
"length of 'norms'. X is " << X.dimension_0 () <<
" x "
1530 << X.dimension_1 () <<
", norms has length " << norms.dimension_0 ()
1531 <<
", and value_count = " << value_count <<
".";
1532 throw std::invalid_argument (os.str ());
1537 KOKKOS_INLINE_FUNCTION
void
1538 operator() (
const size_type i, value_type sum)
const
1540 const size_type numVecs = value_count;
1541 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1544 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1545 #pragma vector always
1547 for (size_type k = 0; k < numVecs; ++k) {
1552 KOKKOS_INLINE_FUNCTION
void
1553 init (value_type update)
const
1555 const size_type numVecs = value_count;
1556 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1559 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1560 #pragma vector always
1562 for (size_type k = 0; k < numVecs; ++k) {
1567 KOKKOS_INLINE_FUNCTION
void
1568 join (
volatile value_type update,
1569 const volatile value_type source)
const
1571 const size_type numVecs = value_count;
1572 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1575 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1576 #pragma vector always
1578 for (size_type k = 0; k < numVecs; ++k) {
1579 update[k] += source[k];
1584 KOKKOS_INLINE_FUNCTION
void
1585 final (
const value_type dst)
const
1587 const size_type numVecs = value_count;
1588 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1591 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1592 #pragma vector always
1594 for (size_type k = 0; k < numVecs; ++k) {
1604 template<
class MultiVecViewType>
1605 struct MultiVecNormInfFunctor {
1606 typedef typename MultiVecViewType::device_type device_type;
1607 typedef typename MultiVecViewType::size_type size_type;
1608 typedef typename MultiVecViewType::value_type mv_value_type;
1612 typedef MultiVecViewType mv_view_type;
1613 typedef typename MultiVecViewType::const_type mv_const_view_type;
1616 mv_const_view_type X_;
1617 norms_view_type norms_;
1619 size_type value_count;
1621 MultiVecNormInfFunctor (
const mv_const_view_type& X,
1622 const norms_view_type& norms) :
1623 X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1625 if (value_count != norms.dimension_0 ()) {
1626 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1627 cuda_abort(
"Kokkos::MultiVecNormInfFunctor: value_count does not match the length of 'norms'");
1629 std::ostringstream os;
1630 os <<
"Kokkos::MultiVecNormInfFunctor: value_count does not match the "
1631 <<
"length of 'norms'. X is " << X.dimension_0 () <<
" x "
1632 << X.dimension_1 () <<
", norms has length " << norms.dimension_0 ()
1633 <<
", and value_count = " << value_count <<
".";
1634 throw std::invalid_argument (os.str ());
1639 KOKKOS_INLINE_FUNCTION
void
1640 operator() (
const size_type i, value_type maxes)
const
1642 const size_type numVecs = value_count;
1643 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1646 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1647 #pragma vector always
1649 for (size_type k = 0; k < numVecs; ++k) {
1658 if (curVal < newVal) {
1664 KOKKOS_INLINE_FUNCTION
void
1665 init (value_type update)
const
1667 const size_type numVecs = value_count;
1668 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1671 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1672 #pragma vector always
1674 for (size_type k = 0; k < numVecs; ++k) {
1682 KOKKOS_INLINE_FUNCTION
void
1683 join (
volatile value_type update,
1684 const volatile value_type source)
const
1686 const size_type numVecs = value_count;
1687 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1690 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1691 #pragma vector always
1693 for (size_type k = 0; k < numVecs; ++k) {
1702 if (curVal < newVal) {
1709 KOKKOS_INLINE_FUNCTION
void
1710 final (
const value_type dst)
const
1712 const size_type numVecs = value_count;
1713 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1716 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1717 #pragma vector always
1719 for (size_type k = 0; k < numVecs; ++k) {
1728 template<
class VecViewType>
1729 struct VecDotFunctor {
1730 typedef typename VecViewType::device_type device_type;
1731 typedef typename VecViewType::size_type size_type;
1732 typedef typename VecViewType::value_type mv_value_type;
1735 typedef typename VecViewType::const_type vec_const_view_type;
1740 vec_const_view_type x_, y_;
1743 VecDotFunctor (
const vec_const_view_type& x,
1744 const vec_const_view_type& y,
1745 const dot_view_type& dot) :
1746 x_ (x), y_ (y), dot_ (dot)
1748 if (x.dimension_0 () != y.dimension_0 ()) {
1749 std::ostringstream os;
1750 os <<
"Kokkos::VecDotFunctor: The dimensions of x and y do not match. "
1751 "x.dimension_0() = " << x.dimension_0 ()
1752 <<
" != y.dimension_0() = " << y.dimension_0 () <<
".";
1753 throw std::invalid_argument (os.str ());
1757 KOKKOS_INLINE_FUNCTION
void
1758 operator() (
const size_type i, value_type& sum)
const {
1762 KOKKOS_INLINE_FUNCTION
void
1763 init (value_type& update)
const {
1767 KOKKOS_INLINE_FUNCTION
void
1768 join (
volatile value_type& update,
1769 const volatile value_type& source)
const {
1774 KOKKOS_INLINE_FUNCTION
void final (value_type& dst)
const {
1782 template<
class VecViewType>
1783 struct VecNorm2SquaredFunctor {
1784 typedef typename VecViewType::device_type device_type;
1785 typedef typename VecViewType::size_type size_type;
1786 typedef typename VecViewType::value_type mv_value_type;
1789 typedef typename VecViewType::const_type vec_const_view_type;
1793 vec_const_view_type x_;
1794 norm_view_type norm_;
1800 VecNorm2SquaredFunctor (
const vec_const_view_type& x,
1801 const norm_view_type& norm) :
1802 x_ (x), norm_ (norm)
1805 KOKKOS_INLINE_FUNCTION
void
1806 operator() (
const size_type i, value_type& sum)
const {
1811 KOKKOS_INLINE_FUNCTION
void
1812 init (value_type& update)
const {
1816 KOKKOS_INLINE_FUNCTION
void
1817 join (
volatile value_type& update,
1818 const volatile value_type& source)
const {
1823 KOKKOS_INLINE_FUNCTION
void final (value_type& dst)
const {
1830 template<
class VecViewType>
1831 struct VecNorm1Functor {
1832 typedef typename VecViewType::device_type device_type;
1833 typedef typename VecViewType::size_type size_type;
1834 typedef typename VecViewType::value_type mv_value_type;
1837 typedef typename VecViewType::const_type vec_const_view_type;
1841 vec_const_view_type x_;
1842 norm_view_type norm_;
1848 VecNorm1Functor (
const vec_const_view_type& x,
1849 const norm_view_type& norm) :
1850 x_ (x), norm_ (norm)
1853 KOKKOS_INLINE_FUNCTION
void
1854 operator() (
const size_type i, value_type& sum)
const {
1858 KOKKOS_INLINE_FUNCTION
void
1859 init (value_type& update)
const {
1863 KOKKOS_INLINE_FUNCTION
void
1864 join (
volatile value_type& update,
1865 const volatile value_type& source)
const {
1870 KOKKOS_INLINE_FUNCTION
void final (value_type& dst)
const {
1877 template<
class VecViewType>
1878 struct VecNormInfFunctor {
1879 typedef typename VecViewType::device_type device_type;
1880 typedef typename VecViewType::size_type size_type;
1881 typedef typename VecViewType::value_type mv_value_type;
1884 typedef typename VecViewType::const_type vec_const_view_type;
1888 vec_const_view_type x_;
1889 norm_view_type norm_;
1895 VecNormInfFunctor (
const vec_const_view_type& x,
1896 const norm_view_type& norm) :
1897 x_ (x), norm_ (norm)
1900 KOKKOS_INLINE_FUNCTION
void
1901 operator() (
const size_type i, value_type& curVal)
const {
1909 if (curVal < newVal) {
1914 KOKKOS_INLINE_FUNCTION
void
1915 init (value_type& update)
const {
1922 KOKKOS_INLINE_FUNCTION
void
1923 join (
volatile value_type& update,
1924 const volatile value_type& source)
const {
1931 if (update < source) {
1937 KOKKOS_INLINE_FUNCTION
void final (value_type& dst)
const {
1955 template<
class ViewType>
1956 class SquareRootFunctor {
1958 typedef typename ViewType::device_type device_type;
1959 typedef typename ViewType::size_type size_type;
1961 SquareRootFunctor (
const ViewType& theView) : theView_ (theView) {}
1963 KOKKOS_INLINE_FUNCTION
void operator() (
const size_type i)
const {
1964 typedef typename ViewType::value_type value_type;
1973 template<
class XVector,
class YVector,
int UNROLL>
1974 struct MV_DotProduct_Right_FunctorUnroll
1976 typedef typename XVector::device_type device_type;
1977 typedef typename XVector::size_type size_type;
1978 typedef typename XVector::non_const_value_type xvalue_type;
1979 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
1981 size_type value_count;
1983 typedef typename XVector::const_type x_const_type;
1984 typedef typename YVector::const_type y_const_type;
1991 KOKKOS_INLINE_FUNCTION
1992 void operator()(
const size_type i, value_type sum )
const
1994 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
1997 for(size_type k=0;k<UNROLL;k++)
1998 sum[k]+=
IPT::dot( m_x(i,k), m_y(i,k) );
2000 KOKKOS_INLINE_FUNCTION
void init(
volatile value_type update)
const
2002 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
2005 for(size_type k=0;k<UNROLL;k++)
2008 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type update ,
2009 const volatile value_type source)
const
2011 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
2014 for(size_type k=0;k<UNROLL;k++)
2015 update[k] += source[k] ;
2019 template<
class rVector,
class XVector,
class YVector>
2020 rVector MV_Dot(
const rVector &r,
const XVector & x,
const YVector & y,
int n = -1)
2022 typedef typename XVector::size_type size_type;
2023 const size_type numVecs = x.dimension(1);
2025 if(n<0) n = x.dimension_0();
2028 MV_DotProduct_Right_FunctorVector<XVector,YVector> op;
2031 op.value_count = numVecs;
2039 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op;
2042 op.value_count = numVecs;
2047 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op;
2050 op.value_count = numVecs;
2055 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op;
2058 op.value_count = numVecs;
2063 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op;
2066 op.value_count = numVecs;
2071 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op;
2074 op.value_count = numVecs;
2079 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op;
2082 op.value_count = numVecs;
2087 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op;
2090 op.value_count = numVecs;
2095 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op;
2098 op.value_count = numVecs;
2103 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op;
2106 op.value_count = numVecs;
2111 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op;
2114 op.value_count = numVecs;
2119 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op;
2122 op.value_count = numVecs;
2127 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op;
2130 op.value_count = numVecs;
2135 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op;
2138 op.value_count = numVecs;
2144 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op;
2147 op.value_count = numVecs;
2152 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op;
2155 op.value_count = numVecs;
2160 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
2161 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
2163 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
2164 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
2165 r[0] = V_Dot(x_1d,y_1d,n);
2176 template<
class XVector>
2177 struct MV_Sum_Functor
2179 typedef typename XVector::device_type device_type;
2180 typedef typename XVector::size_type size_type;
2181 typedef typename XVector::non_const_value_type xvalue_type;
2182 typedef xvalue_type value_type[];
2184 typename XVector::const_type m_x ;
2185 size_type value_count;
2187 MV_Sum_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2190 KOKKOS_INLINE_FUNCTION
2191 void operator()(
const size_type i, value_type sum )
const
2193 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2196 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2197 #pragma vector always
2199 for(size_type k=0;k<value_count;k++){
2204 KOKKOS_INLINE_FUNCTION
2205 void init( value_type update)
const
2207 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2210 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2211 #pragma vector always
2213 for(size_type k=0;k<value_count;k++)
2217 KOKKOS_INLINE_FUNCTION
2218 void join(
volatile value_type update ,
2219 const volatile value_type source )
const
2221 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2224 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2225 #pragma vector always
2227 for(size_type k=0;k<value_count;k++){
2228 update[k] += source[k];
2234 template<
class normVector,
class VectorType>
2235 normVector MV_Sum(
const normVector &r,
const VectorType & x,
int n = -1)
2238 n = x.dimension_0 ();
2248 template<
class XVector>
2249 struct MV_Norm1_Functor
2251 typedef typename XVector::device_type device_type;
2252 typedef typename XVector::size_type size_type;
2253 typedef typename XVector::non_const_value_type xvalue_type;
2254 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2257 typename XVector::const_type m_x ;
2258 size_type value_count;
2260 MV_Norm1_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2263 KOKKOS_INLINE_FUNCTION
2264 void operator()(
const size_type i, value_type sum )
const
2266 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2269 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2270 #pragma vector always
2272 for(size_type k=0;k<value_count;k++){
2277 KOKKOS_INLINE_FUNCTION
2278 void init( value_type update)
const
2280 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2283 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2284 #pragma vector always
2286 for(size_type k=0;k<value_count;k++)
2290 KOKKOS_INLINE_FUNCTION
2291 void join(
volatile value_type update ,
2292 const volatile value_type source )
const
2294 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2297 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2298 #pragma vector always
2300 for(size_type k=0;k<value_count;k++){
2301 update[k] += source[k];
2306 template<
class normVector,
class VectorType>
2307 normVector MV_Norm1(
const normVector &r,
const VectorType & x,
int n = -1)
2310 n = x.dimension_0 ();
2320 template<
class XVector>
2321 struct MV_NormInf_Functor
2323 typedef typename XVector::device_type device_type;
2324 typedef typename XVector::size_type size_type;
2325 typedef typename XVector::non_const_value_type xvalue_type;
2326 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2329 typename XVector::const_type m_x ;
2330 size_type value_count;
2332 MV_NormInf_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2334 KOKKOS_INLINE_FUNCTION
2335 void operator()(
const size_type i, value_type sum )
const
2337 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2340 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2341 #pragma vector always
2343 for(size_type k=0;k<value_count;k++){
2348 KOKKOS_INLINE_FUNCTION
void init( value_type update)
const
2350 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2353 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2354 #pragma vector always
2356 for(size_type k=0;k<value_count;k++)
2359 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type update ,
2360 const volatile value_type source )
const
2362 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2365 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2366 #pragma vector always
2368 for(size_type k=0;k<value_count;k++){
2369 update[k] = MAX(update[k],source[k]);
2375 template<
class normVector,
class VectorType>
2376 normVector MV_NormInf(
const normVector &r,
const VectorType & x,
int n = -1)
2379 n = x.dimension_0 ();
2389 template<
class WeightVector,
class XVector,
int WeightsRanks>
2390 struct MV_DotWeighted_Functor{};
2392 template<
class WeightVector,
class XVector>
2393 struct MV_DotWeighted_Functor<WeightVector,XVector,1>
2395 typedef typename XVector::device_type device_type;
2396 typedef typename XVector::size_type size_type;
2397 typedef typename XVector::non_const_value_type xvalue_type;
2398 typedef typename WeightVector::non_const_value_type wvalue_type;
2399 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2400 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2401 typedef typename XIPT::dot_type value_type[];
2403 typename WeightVector::const_type m_w ;
2404 typename XVector::const_type m_x ;
2405 size_type value_count;
2407 MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
2409 KOKKOS_INLINE_FUNCTION
2410 void operator()(
const size_type i, value_type sum )
const
2412 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2415 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2416 #pragma vector always
2418 for(size_type k=0;k<value_count;k++){
2419 sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i), m_w(i) );
2423 KOKKOS_INLINE_FUNCTION
void init( value_type update)
const
2425 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2428 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2429 #pragma vector always
2431 for(size_type k=0;k<value_count;k++)
2434 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type update ,
2435 const volatile value_type source )
const
2437 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2440 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2441 #pragma vector always
2443 for(size_type k=0;k<value_count;k++){
2444 update[k] += source[k];
2449 template<
class WeightVector,
class XVector>
2450 struct MV_DotWeighted_Functor<WeightVector,XVector,2>
2452 typedef typename XVector::device_type device_type;
2453 typedef typename XVector::size_type size_type;
2454 typedef typename XVector::non_const_value_type xvalue_type;
2455 typedef typename WeightVector::non_const_value_type wvalue_type;
2456 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2457 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2458 typedef typename XIPT::dot_type value_type[];
2460 typename WeightVector::const_type m_w ;
2461 typename XVector::const_type m_x ;
2462 size_type value_count;
2464 MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
2466 KOKKOS_INLINE_FUNCTION
2467 void operator()(
const size_type i, value_type sum )
const
2469 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2472 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2473 #pragma vector always
2475 for(size_type k=0;k<value_count;k++){
2476 sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i,k), m_w(i,k) );
2480 KOKKOS_INLINE_FUNCTION
void init( value_type update)
const
2482 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2485 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2486 #pragma vector always
2488 for(size_type k=0;k<value_count;k++)
2491 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type update ,
2492 const volatile value_type source )
const
2494 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2497 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2498 #pragma vector always
2500 for(size_type k=0;k<value_count;k++){
2501 update[k] += source[k];
2506 template<
class rVector,
class WeightVector,
class XVector>
2507 rVector MV_DotWeighted(
const rVector &r,
const WeightVector & w,
const XVector & x,
int n = -1)
2510 n = x.dimension_0 ();
2513 typedef MV_DotWeighted_Functor<WeightVector, XVector, WeightVector::Rank> functor_type;
2521 template<
class RVector,
class aVector,
class XVector>
2522 struct V_MulScalarFunctor
2524 typedef typename XVector::device_type device_type;
2525 typedef typename XVector::size_type size_type;
2528 typename XVector::const_type m_x ;
2529 typename aVector::const_type m_a ;
2532 KOKKOS_INLINE_FUNCTION
2533 void operator()(
const size_type i)
const
2535 m_r(i) = m_a[0]*m_x(i);
2539 template<
class aVector,
class XVector>
2540 struct V_MulScalarFunctorSelf
2542 typedef typename XVector::device_type device_type;
2543 typedef typename XVector::size_type size_type;
2546 typename aVector::const_type m_a ;
2549 KOKKOS_INLINE_FUNCTION
2550 void operator()(
const size_type i)
const
2556 template<
class RVector,
class DataType,
class Layout,
class Device,
class MemoryManagement,
class Specialisation,
class XVector>
2561 V_MulScalarFunctorSelf<aVector,XVector> op ;
2568 V_MulScalarFunctor<RVector,aVector,XVector> op ;
2576 template<
class RVector,
class XVector>
2577 struct V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector>
2579 typedef typename XVector::device_type device_type;
2580 typedef typename XVector::size_type size_type;
2583 typename XVector::const_type m_x ;
2584 typename XVector::value_type m_a ;
2587 KOKKOS_INLINE_FUNCTION
2588 void operator()(
const size_type i)
const
2590 m_r(i) = m_a*m_x(i);
2594 template<
class XVector>
2595 struct V_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
2597 typedef typename XVector::device_type device_type;
2598 typedef typename XVector::size_type size_type;
2601 typename XVector::non_const_value_type m_a ;
2604 KOKKOS_INLINE_FUNCTION
2605 void operator()(
const size_type i)
const
2612 template<
class RVector,
class XVector>
2613 RVector V_MulScalar(
const RVector & r,
const typename XVector::non_const_value_type &a,
const XVector & x)
2616 V_MulScalarFunctorSelf<typename RVector::value_type,RVector> op ;
2623 V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
2631 template<
class RVector,
class XVector,
class YVector,
int scalar_x,
int scalar_y>
2632 struct V_AddVectorFunctor
2634 typedef typename RVector::device_type device_type;
2635 typedef typename RVector::size_type size_type;
2636 typedef typename XVector::non_const_value_type value_type;
2638 typename XVector::const_type m_x ;
2639 typename YVector::const_type m_y ;
2640 const value_type m_a;
2641 const value_type m_b;
2644 V_AddVectorFunctor(
const RVector& r,
const value_type& a,
const XVector& x,
const value_type& b,
const YVector& y):
2645 m_r(r),m_x(x),m_y(y),m_a(a),m_b(b)
2648 KOKKOS_INLINE_FUNCTION
2649 void operator()(
const size_type i )
const
2651 if((scalar_x==1)&&(scalar_y==1))
2652 m_r(i) = m_x(i) + m_y(i);
2653 if((scalar_x==1)&&(scalar_y==-1))
2654 m_r(i) = m_x(i) - m_y(i);
2655 if((scalar_x==-1)&&(scalar_y==-1))
2656 m_r(i) = -m_x(i) - m_y(i);
2657 if((scalar_x==-1)&&(scalar_y==1))
2658 m_r(i) = -m_x(i) + m_y(i);
2659 if((scalar_x==2)&&(scalar_y==1))
2660 m_r(i) = m_a*m_x(i) + m_y(i);
2661 if((scalar_x==2)&&(scalar_y==-1))
2662 m_r(i) = m_a*m_x(i) - m_y(i);
2663 if((scalar_x==1)&&(scalar_y==2))
2664 m_r(i) = m_x(i) + m_b*m_y(i);
2665 if((scalar_x==-1)&&(scalar_y==2))
2666 m_r(i) = -m_x(i) + m_b*m_y(i);
2667 if((scalar_x==2)&&(scalar_y==2))
2668 m_r(i) = m_a*m_x(i) + m_b*m_y(i);
2672 template<
class RVector,
class XVector,
int scalar_x>
2673 struct V_AddVectorSelfFunctor
2675 typedef typename RVector::device_type device_type;
2676 typedef typename RVector::size_type size_type;
2677 typedef typename XVector::non_const_value_type value_type;
2679 typename XVector::const_type m_x ;
2680 const value_type m_a;
2682 V_AddVectorSelfFunctor(
const RVector& r,
const value_type& a,
const XVector& x):
2683 m_r(r),m_x(x),m_a(a)
2686 KOKKOS_INLINE_FUNCTION
2687 void operator()(
const size_type i )
const
2694 m_r(i) += m_a*m_x(i);
2697 template<
class RVector,
class XVector,
class YVector,
int doalpha,
int dobeta>
2698 RVector V_AddVector(
const RVector & r,
const typename XVector::non_const_value_type &av,
const XVector & x,
2699 const typename XVector::non_const_value_type &bv,
const YVector & y,
int n=-1)
2701 if(n == -1) n = x.dimension_0();
2702 if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) {
2703 V_AddVectorSelfFunctor<RVector,YVector,dobeta> f(r,bv,y);
2705 }
else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) {
2706 V_AddVectorSelfFunctor<RVector,XVector,doalpha> f(r,av,x);
2709 V_AddVectorFunctor<RVector,XVector,YVector,doalpha,dobeta> f(r,av,x,bv,y);
2715 template<
class RVector,
class XVector,
class YVector>
2716 RVector V_AddVector(
const RVector & r,
const typename XVector::non_const_value_type &av,
const XVector & x,
2717 const typename YVector::non_const_value_type &bv,
const YVector & y,
int n = -1,
2722 V_AddVector<RVector,XVector,YVector,-1,-1>(r,av,x,bv,y,n);
2724 V_AddVector<RVector,XVector,YVector,-1,0>(r,av,x,bv,y,n);
2726 V_AddVector<RVector,XVector,YVector,-1,1>(r,av,x,bv,y,n);
2728 V_AddVector<RVector,XVector,YVector,-1,2>(r,av,x,bv,y,n);
2731 V_AddVector<RVector,XVector,YVector,0,-1>(r,av,x,bv,y,n);
2733 V_AddVector<RVector,XVector,YVector,0,0>(r,av,x,bv,y,n);
2735 V_AddVector<RVector,XVector,YVector,0,1>(r,av,x,bv,y,n);
2737 V_AddVector<RVector,XVector,YVector,0,2>(r,av,x,bv,y,n);
2740 V_AddVector<RVector,XVector,YVector,1,-1>(r,av,x,bv,y,n);
2742 V_AddVector<RVector,XVector,YVector,1,0>(r,av,x,bv,y,n);
2744 V_AddVector<RVector,XVector,YVector,1,1>(r,av,x,bv,y,n);
2746 V_AddVector<RVector,XVector,YVector,1,2>(r,av,x,bv,y,n);
2749 V_AddVector<RVector,XVector,YVector,2,-1>(r,av,x,bv,y,n);
2751 V_AddVector<RVector,XVector,YVector,2,0>(r,av,x,bv,y,n);
2753 V_AddVector<RVector,XVector,YVector,2,1>(r,av,x,bv,y,n);
2755 V_AddVector<RVector,XVector,YVector,2,2>(r,av,x,bv,y,n);
2760 template<
class RVector,
class XVector,
class YVector>
2761 RVector V_Add(
const RVector & r,
const XVector & x,
const YVector & y,
int n=-1)
2763 return V_AddVector( r,1,x,1,y,n,1,1);
2766 template<
class RVector,
class XVector,
class YVector>
2767 RVector V_Add(
const RVector & r,
const XVector & x,
const typename XVector::non_const_value_type & bv,
const YVector & y,
int n=-1 )
2773 return V_AddVector(r,bv,x,bv,y,n,1,b);
2776 template<
class RVector,
class XVector,
class YVector>
2777 RVector V_Add(
const RVector & r,
const typename XVector::non_const_value_type & av,
const XVector & x,
const typename XVector::non_const_value_type & bv,
const YVector & y,
int n=-1 )
2788 return V_AddVector(r,av,x,bv,y,n,a,b);
2791 template<
class XVector,
class YVector>
2794 typedef typename XVector::device_type device_type;
2795 typedef typename XVector::size_type size_type;
2796 typedef typename XVector::non_const_value_type xvalue_type;
2797 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2803 V_DotFunctor(
const XVector& x,
const YVector& y):
2807 KOKKOS_INLINE_FUNCTION
2808 void operator()(
const size_type &i, value_type &sum )
const
2813 KOKKOS_INLINE_FUNCTION
2814 void init (
volatile value_type& update)
const
2819 KOKKOS_INLINE_FUNCTION
2820 void join(
volatile value_type &update ,
2821 const volatile value_type &source )
const
2827 template<
class XVector,
class YVector>
2828 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
2829 V_Dot(
const XVector & x,
const YVector & y,
int n = -1)
2831 typedef V_DotFunctor<XVector,YVector> Functor;
2833 if (n<0) n = x.dimension_0();
2834 typename Functor::value_type ret_val;
2839 template<
class WeightVector,
class XVector>
2840 struct V_DotWeighted_Functor
2842 typedef typename XVector::device_type device_type;
2843 typedef typename XVector::size_type size_type;
2844 typedef typename XVector::non_const_value_type xvalue_type;
2845 typedef typename WeightVector::non_const_value_type wvalue_type;
2846 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2847 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2850 typename WeightVector::const_type m_w;
2851 typename XVector::const_type m_x;
2853 V_DotWeighted_Functor (WeightVector w, XVector x) :
2857 KOKKOS_INLINE_FUNCTION
2858 void operator() (
const size_type i, value_type& sum)
const {
2862 KOKKOS_INLINE_FUNCTION
void init (value_type& update)
const {
2866 KOKKOS_INLINE_FUNCTION
void
2867 join (
volatile value_type& update,
2868 const volatile value_type& source)
const
2874 template<
class WeightVector,
class XVector>
2875 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
2876 V_DotWeighted(
const WeightVector & w,
const XVector & x,
int n = -1)
2879 n = x.dimension_0 ();
2882 typedef Details::InnerProductSpaceTraits<typename XVector::non_const_value_type> IPT;
2883 typedef typename IPT::dot_type value_type;
2886 typedef V_DotWeighted_Functor<WeightVector,XVector> functor_type;
2894 template<
class XVector>
2895 struct V_Sum_Functor
2897 typedef typename XVector::device_type device_type;
2898 typedef typename XVector::size_type size_type;
2899 typedef typename XVector::non_const_value_type xvalue_type;
2900 typedef xvalue_type value_type;
2902 typename XVector::const_type m_x ;
2904 V_Sum_Functor(XVector x):m_x(x) {}
2907 KOKKOS_INLINE_FUNCTION
2908 void operator()(
const size_type i, value_type& sum )
const
2913 KOKKOS_INLINE_FUNCTION
2914 void init( value_type& update)
const
2919 KOKKOS_INLINE_FUNCTION
2920 void join(
volatile value_type& update ,
2921 const volatile value_type& source )
const
2928 template<
class VectorType>
2929 typename VectorType::non_const_value_type
2930 V_Sum (
const VectorType& x,
int n = -1)
2933 n = x.dimension_0 ();
2936 typedef typename VectorType::non_const_value_type value_type;
2945 template<
class XVector>
2946 struct V_Norm1_Functor
2948 typedef typename XVector::device_type device_type;
2949 typedef typename XVector::size_type size_type;
2950 typedef typename XVector::non_const_value_type xvalue_type;
2951 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2954 typename XVector::const_type m_x ;
2956 V_Norm1_Functor(XVector x):m_x(x) {}
2958 KOKKOS_INLINE_FUNCTION
2959 void operator()(
const size_type i, value_type& sum )
const
2963 KOKKOS_INLINE_FUNCTION
void init (value_type& update)
const
2967 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type& update ,
2968 const volatile value_type& source )
const
2974 template<
class VectorType>
2975 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
2976 V_Norm1(
const VectorType & x,
int n = -1)
2979 n = x.dimension_0 ();
2982 typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
2983 typedef typename IPT::dot_type value_type;
2991 template<
class XVector>
2992 struct V_NormInf_Functor
2994 typedef typename XVector::device_type device_type;
2995 typedef typename XVector::size_type size_type;
2996 typedef typename XVector::non_const_value_type xvalue_type;
2997 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
3000 typename XVector::const_type m_x ;
3002 V_NormInf_Functor(XVector x):m_x(x) {}
3004 KOKKOS_INLINE_FUNCTION
3005 void operator()(
const size_type i, value_type& sum )
const
3010 KOKKOS_INLINE_FUNCTION
void init (value_type& update)
const
3014 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type& update ,
3015 const volatile value_type& source )
const
3017 update = MAX(update,source);
3021 template<
class VectorType>
3022 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
3023 V_NormInf (
const VectorType& x,
int n = -1)
3026 n = x.dimension_0 ();
3029 typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
3030 typedef typename IPT::dot_type value_type;
3039 template<
class RVector,
class XVector>
3040 struct V_ReciprocalFunctor
3042 typedef typename XVector::device_type device_type;
3043 typedef typename XVector::size_type size_type;
3046 typename XVector::const_type m_x ;
3048 V_ReciprocalFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
3051 KOKKOS_INLINE_FUNCTION
3052 void operator()(
const size_type i)
const
3058 template<
class XVector>
3059 struct V_ReciprocalSelfFunctor
3061 typedef typename XVector::device_type device_type;
3062 typedef typename XVector::size_type size_type;
3066 V_ReciprocalSelfFunctor(XVector x):m_x(x) {}
3069 KOKKOS_INLINE_FUNCTION
3070 void operator()(
const size_type i)
const
3076 template<
class RVector,
class XVector>
3077 RVector V_Reciprocal(
const RVector & r,
const XVector & x)
3086 V_ReciprocalSelfFunctor<XVector> op(x) ;
3091 V_ReciprocalFunctor<RVector,XVector> op(r,x) ;
3099 template<
class XVector>
3100 struct V_ReciprocalThresholdSelfFunctor
3102 typedef typename XVector::device_type device_type;
3103 typedef typename XVector::size_type size_type;
3104 typedef typename XVector::non_const_value_type value_type;
3109 const value_type m_min_val;
3110 const value_type m_min_val_mag;
3112 V_ReciprocalThresholdSelfFunctor(
const XVector& x,
const value_type& min_val) :
3113 m_x(x), m_min_val(min_val), m_min_val_mag(KAT::
abs(min_val)) {}
3116 KOKKOS_INLINE_FUNCTION
3117 void operator()(
const size_type i)
const
3119 if (
KAT::abs(m_x(i)) < m_min_val_mag)
3126 template<
class XVector>
3127 XVector V_ReciprocalThreshold(
const XVector & x,
const typename XVector::non_const_value_type& min_val )
3129 V_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val) ;
3137 template<
class RVector,
class XVector>
3140 typedef typename XVector::device_type device_type;
3141 typedef typename XVector::size_type size_type;
3144 typename XVector::const_type m_x ;
3146 V_AbsFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
3149 KOKKOS_INLINE_FUNCTION
3150 void operator()(
const size_type i)
const
3156 template<
class XVector>
3157 struct V_AbsSelfFunctor
3159 typedef typename XVector::device_type device_type;
3160 typedef typename XVector::size_type size_type;
3164 V_AbsSelfFunctor(XVector x):m_x(x) {}
3167 KOKKOS_INLINE_FUNCTION
3168 void operator()(
const size_type i)
const
3174 template<
class RVector,
class XVector>
3175 RVector V_Abs(
const RVector & r,
const XVector & x)
3184 V_AbsSelfFunctor<XVector> op(x) ;
3189 V_AbsFunctor<RVector,XVector> op(r,x) ;
3197 template<
class CVector,
class AVector,
class BVector>
3198 struct V_ElementWiseMultiplyFunctor
3200 typedef typename CVector::device_type device_type;
3201 typedef typename CVector::size_type size_type;
3203 typename CVector::const_value_type m_c;
3205 typename AVector::const_value_type m_ab;
3206 typename AVector::const_type m_A ;
3207 typename BVector::const_type m_B ;
3209 V_ElementWiseMultiplyFunctor(
3210 typename CVector::const_value_type c,
3212 typename AVector::const_value_type ab,
3213 typename AVector::const_type A,
3214 typename BVector::const_type B):
3215 m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B)
3219 KOKKOS_INLINE_FUNCTION
3220 void operator()(
const size_type i)
const
3222 m_C(i) = m_c*m_C(i) + m_ab*m_A(i)*m_B(i);
3227 template<
class CVector,
class AVector,
class BVector>
3228 CVector V_ElementWiseMultiply(
3229 typename CVector::const_value_type c,
3231 typename AVector::const_value_type ab,
3252 V_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B) ;
static KOKKOS_FORCEINLINE_FUNCTION T sqrt(const T &x)
The square root of x.
static KOKKOS_FORCEINLINE_FUNCTION mag_type abs(const T &x)
The absolute value (magnitude) of x.
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
ArithTraits< T >::mag_type mag_type
The type returned by norm(x) for a value x of type T.
static KOKKOS_FORCEINLINE_FUNCTION T one()
The one value of T; the multiplicative identity.
T dot_type
The type returned by dot(x,y) for values x and y of type T.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const T &x, const T &y)
The "dot product" of two values x and y of type T.
KOKKOS_INLINE_FUNCTION RealType abs(const complex< RealType > &x)
Absolute value (magnitude) of a complex number.
View to an array of data.
static KOKKOS_FORCEINLINE_FUNCTION T zero()
The zero value of T; the arithmetic identity.
Traits class for inner product space operations on type T.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const T &x)
The "norm" (absolute value or magnitude) of a value x of type T.
Declaration and definition of Kokkos::Details::InnerProductSpaceTraits.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
T mag_type
The type of the magnitude (absolute value) of T.
Traits class for arithmetic on type T.