Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_MV.hpp
1 #ifndef KOKKOS_MULTIVECTOR_H_
2 #define KOKKOS_MULTIVECTOR_H_
3 
4 #include <Kokkos_Core.hpp>
6 #include <ctime>
7 
8 #include <iostream>
9 #include <stdexcept> // For some recently added error handling
10 
11 #define MAX(a,b) (a<b?b:a)
12 
13 namespace Kokkos {
14 
15 
16 template<typename Scalar, class device>
17 struct MultiVectorDynamic{
18 #ifdef KOKKOS_USE_CUSPARSE
19  typedef typename Kokkos::LayoutLeft layout;
20 #else
21 #ifdef KOKKOS_USE_MKL
22  typedef typename Kokkos::LayoutRight layout;
23 #else
24  typedef typename device::array_layout layout;
25 #endif
26 #endif
27  typedef typename Kokkos::View<Scalar** , layout, device> type ;
28  typedef typename Kokkos::View<const Scalar** , layout, device> const_type ;
30  MultiVectorDynamic() {}
31  ~MultiVectorDynamic() {}
32 };
33 
34 template<typename Scalar, class device, int n>
35 struct MultiVectorStatic{
36  typedef Scalar scalar;
37  typedef typename device::array_layout layout;
38  typedef typename Kokkos::View<Scalar*[n] , layout, device> type ;
39  typedef typename Kokkos::View<const Scalar*[n] , layout, device> const_type ;
41  MultiVectorStatic() {}
42  ~MultiVectorStatic() {}
43 };
44 
45 
46 
47 /*------------------------------------------------------------------------------------------
48  *-------------------------- Multiply with scalar: y = a * x -------------------------------
49  *------------------------------------------------------------------------------------------*/
50 template<class RVector, class aVector, class XVector>
51 struct MV_MulScalarFunctor
52 {
53  typedef typename XVector::device_type device_type;
54  typedef typename XVector::size_type size_type;
55 
56  RVector m_r;
57  typename XVector::const_type m_x ;
58  typename aVector::const_type m_a ;
59  size_type n;
60  MV_MulScalarFunctor() {n=1;}
61  //--------------------------------------------------------------------------
62 
63  KOKKOS_INLINE_FUNCTION
64  void operator()( const size_type i) const
65  {
66 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
67 #pragma ivdep
68 #endif
69  for(size_type k=0;k<n;k++)
70  m_r(i,k) = m_a[k]*m_x(i,k);
71  }
72 };
73 
74 template<class aVector, class XVector>
75 struct MV_MulScalarFunctorSelf
76 {
77  typedef typename XVector::device_type device_type;
78  typedef typename XVector::size_type size_type;
79 
80  XVector m_x;
81  typename aVector::const_type m_a ;
82  size_type n;
83  //--------------------------------------------------------------------------
84 
85  KOKKOS_INLINE_FUNCTION
86  void operator()( const size_type i) const
87  {
88 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
89 #pragma ivdep
90 #endif
91  for(size_type k=0;k<n;k++)
92  m_x(i,k) *= m_a[k];
93  }
94 };
95 
96 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector>
97 RVector MV_MulScalar (const RVector& r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation>& a, const XVector& x)
98 {
100  if (r == x) {
101  MV_MulScalarFunctorSelf<aVector,XVector> op ;
102  op.m_x = x ;
103  op.m_a = a ;
104  op.n = x.dimension(1);
105  Kokkos::parallel_for (x.dimension (0), op);
106  return r;
107  }
108 
109  MV_MulScalarFunctor<RVector,aVector,XVector> op ;
110  op.m_r = r ;
111  op.m_x = x ;
112  op.m_a = a ;
113  op.n = x.dimension(1);
114  Kokkos::parallel_for( x.dimension(0) , op );
115  return r;
116 }
117 
118 template<class RVector, class XVector>
119 struct MV_MulScalarFunctor<RVector,typename RVector::value_type,XVector>
120 {
121  typedef typename XVector::device_type device_type;
122  typedef typename XVector::size_type size_type;
123 
124  RVector m_r;
125  typename XVector::const_type m_x ;
126  typename RVector::value_type m_a ;
127  size_type n;
128  MV_MulScalarFunctor() {n=1;}
129  //--------------------------------------------------------------------------
130 
131  KOKKOS_INLINE_FUNCTION
132  void operator()( const size_type i) const
133  {
134 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
135 #pragma ivdep
136 #endif
137  for(size_type k=0;k<n;k++)
138  m_r(i,k) = m_a*m_x(i,k);
139  }
140 };
141 
142 template<class XVector>
143 struct MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
144 {
145  typedef typename XVector::device_type device_type;
146  typedef typename XVector::size_type size_type;
147 
148  XVector m_x;
149  typename XVector::non_const_value_type m_a ;
150  size_type n;
151  //--------------------------------------------------------------------------
152 
153  KOKKOS_INLINE_FUNCTION
154  void operator()( const size_type i) const
155  {
156 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
157 #pragma ivdep
158 #endif
159  for(size_type k=0;k<n;k++)
160  m_x(i,k) *= m_a;
161  }
162 };
163 
164 template<class RVector, class XVector>
165 RVector MV_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x)
166 {
167  /*if(r.dimension_1()==1) {
168  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
169  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
170 
171  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
172  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
173  return V_MulScalar(r_1d,a,x_1d);
174  }*/
175  if (r == x) {
176  MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> op ;
177  op.m_x = x ;
178  op.m_a = a ;
179  op.n = x.dimension(1);
180  Kokkos::parallel_for (x.dimension (0), op);
181  return r;
182  }
183 
184  MV_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
185  op.m_r = r ;
186  op.m_x = x ;
187  op.m_a = a ;
188  op.n = x.dimension(1);
189  Kokkos::parallel_for( x.dimension(0) , op );
190  return r;
191 }
192 
193 /*------------------------------------------------------------------------------------------
194  *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------
195  *------------------------------------------------------------------------------------------*/
196 template<class RVector, class XVector>
197 struct MV_ReciprocalFunctor
198 {
199  typedef typename XVector::device_type device_type;
200  typedef typename XVector::size_type size_type;
201 
202  RVector m_r;
203  typename XVector::const_type m_x ;
204 
205  const size_type m_n;
206  MV_ReciprocalFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
207  //--------------------------------------------------------------------------
208 
209  KOKKOS_INLINE_FUNCTION
210  void operator()( const size_type i) const
211  {
212 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
213 #pragma ivdep
214 #endif
215  for(size_type k=0;k<m_n;k++)
217  }
218 };
219 
220 template<class XVector>
221 struct MV_ReciprocalSelfFunctor
222 {
223  typedef typename XVector::device_type device_type;
224  typedef typename XVector::size_type size_type;
225 
226  XVector m_x ;
227 
228  const size_type m_n;
229  MV_ReciprocalSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
230  //--------------------------------------------------------------------------
231 
232  KOKKOS_INLINE_FUNCTION
233  void operator()( const size_type i) const
234  {
235 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
236 #pragma ivdep
237 #endif
238  for(size_type k=0;k<m_n;k++)
240  }
241 };
242 
243 template<class RVector, class XVector>
244 RVector MV_Reciprocal( const RVector & r, const XVector & x)
245 {
246  // TODO: Add error check (didn't link for some reason?)
247  /*if(r.dimension_0() != x.dimension_0())
248  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match");
249  if(r.dimension_1() != x.dimension_1())
250  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(1) of r and x don't match");*/
251 
252  //TODO: Get 1D version done
253  /*if(r.dimension_1()==1) {
254  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
255  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
256 
257  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
258  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
259  return V_MulScalar(r_1d,a,x_1d);
260  }*/
261  if(r==x) {
262  MV_ReciprocalSelfFunctor<XVector> op(x,x.dimension_1()) ;
263  Kokkos::parallel_for( x.dimension_0() , op );
264  return r;
265  }
266 
267  MV_ReciprocalFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
268  Kokkos::parallel_for( x.dimension_0() , op );
269  return r;
270 }
271 
272 /*------------------------------------------------------------------------------------------
273  *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ----------------
274  *------------------------------------------------------------------------------------------*/
275 template<class XVector>
276 struct MV_ReciprocalThresholdSelfFunctor
277 {
278  typedef typename XVector::device_type device_type;
279  typedef typename XVector::size_type size_type;
280  typedef typename XVector::non_const_value_type value_type;
282  typedef typename KAT::mag_type mag_type;
283 
284  const XVector m_x;
285  const value_type m_min_val;
286  const value_type m_min_val_mag;
287  const size_type m_n;
288 
289  MV_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val, const size_type n) :
290  m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)), m_n(n) {}
291  //--------------------------------------------------------------------------
292 
293  KOKKOS_INLINE_FUNCTION
294  void operator()( const size_type i) const
295  {
296 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
297 #pragma ivdep
298 #endif
299  for(size_type k=0;k<m_n;k++) {
300  if (KAT::abs(m_x(i,k)) < m_min_val_mag)
301  m_x(i,k) = m_min_val;
302  else
303  m_x(i,k) = KAT::one() / m_x(i,k);
304  }
305  }
306 };
307 
308 template<class XVector>
309 XVector MV_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val )
310 {
311  MV_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val,x.dimension_1()) ;
312  Kokkos::parallel_for( x.dimension_0() , op );
313  return x;
314 }
315 
316 /*------------------------------------------------------------------------------------------
317  *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------
318  *------------------------------------------------------------------------------------------*/
319 template<class RVector, class XVector>
320 struct MV_AbsFunctor
321 {
322  typedef typename XVector::device_type device_type;
323  typedef typename XVector::size_type size_type;
324 
325  RVector m_r;
326  typename XVector::const_type m_x ;
327 
328  const size_type m_n;
329  MV_AbsFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
330  //--------------------------------------------------------------------------
331 
332  KOKKOS_INLINE_FUNCTION
333  void operator()( const size_type i) const
334  {
335 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
336 #pragma ivdep
337 #endif
338  for(size_type k=0;k<m_n;k++)
340  }
341 };
342 
343 template<class XVector>
344 struct MV_AbsSelfFunctor
345 {
346  typedef typename XVector::device_type device_type;
347  typedef typename XVector::size_type size_type;
348 
349  XVector m_x ;
350 
351  const size_type m_n;
352  MV_AbsSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
353  //--------------------------------------------------------------------------
354 
355  KOKKOS_INLINE_FUNCTION
356  void operator()( const size_type i) const
357  {
358 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
359 #pragma ivdep
360 #endif
361  for(size_type k=0;k<m_n;k++)
363  }
364 };
365 
366 template<class RVector, class XVector>
367 RVector MV_Abs( const RVector & r, const XVector & x)
368 {
369  // TODO: Add error check (didn't link for some reason?)
370  /*if(r.dimension_0() != x.dimension_0())
371  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match");
372  if(r.dimension_1() != x.dimension_1())
373  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(1) of r and x don't match");*/
374 
375  //TODO: Get 1D version done
376  /*if(r.dimension_1()==1) {
377  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
378  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
379 
380  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
381  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
382  return V_Abs(r_1d,x_1d);
383  }*/
384  if(r==x) {
385  MV_AbsSelfFunctor<XVector> op(x,x.dimension_1()) ;
386  Kokkos::parallel_for( x.dimension_0() , op );
387  return r;
388  }
389 
390  MV_AbsFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
391  Kokkos::parallel_for( x.dimension_0() , op );
392  return r;
393 }
394 
395 /*------------------------------------------------------------------------------------------
396  *------ ElementWiseMultiply element wise: C(i,j) = c*C(i,j) + ab*A(i)*B(i,j) --------------
397  *------------------------------------------------------------------------------------------*/
398 template<class CVector, class AVector, class BVector>
399 struct MV_ElementWiseMultiplyFunctor
400 {
401  typedef typename CVector::device_type device_type;
402  typedef typename CVector::size_type size_type;
403 
404  typename CVector::const_value_type m_c;
405  CVector m_C;
406  typename AVector::const_value_type m_ab;
407  typename AVector::const_type m_A ;
408  typename BVector::const_type m_B ;
409 
410  const size_type m_n;
411  MV_ElementWiseMultiplyFunctor(
412  typename CVector::const_value_type c,
413  CVector C,
414  typename AVector::const_value_type ab,
415  typename AVector::const_type A,
416  typename BVector::const_type B,
417  const size_type n):
418  m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B),m_n(n)
419  {}
420  //--------------------------------------------------------------------------
421 
422  KOKKOS_INLINE_FUNCTION
423  void operator()( const size_type i) const
424  {
425  typename AVector::const_value_type Ai = m_A(i);
426 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
427 #pragma ivdep
428 #endif
429  for(size_type k=0;k<m_n;k++)
430  m_C(i,k) = m_c*m_C(i,k) + m_ab*Ai*m_B(i,k);
431  }
432 };
433 
434 
435 template<class CVector, class AVector, class BVector>
436 CVector MV_ElementWiseMultiply(
437  typename CVector::const_value_type c,
438  CVector C,
439  typename AVector::const_value_type ab,
440  AVector A,
441  BVector B
442  )
443 {
444  // TODO: Add error check (didn't link for some reason?)
445  /*if(r.dimension_0() != x.dimension_0())
446  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match");
447  if(r.dimension_1() != x.dimension_1())
448  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/
449 
450  //TODO: Get 1D version done
451  /*if(r.dimension_1()==1) {
452  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
453  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
454 
455  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
456  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
457  return V_ElementWiseMultiply(r_1d,x_1d);
458  }*/
459 
460  MV_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B,C.dimension_1()) ;
461  Kokkos::parallel_for( C.dimension_0() , op );
462  return C;
463 }
464 
465 /*------------------------------------------------------------------------------------------
466  *-------------------------- Vector Add: r = a*x + b*y -------------------------------------
467  *------------------------------------------------------------------------------------------*/
468 
469 /* Variants of Functors with a and b being vectors. */
470 
471 //Unroll for n<=16
472 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y,int UNROLL>
473 struct MV_AddUnrollFunctor
474 {
475  typedef typename RVector::device_type device_type;
476  typedef typename RVector::size_type size_type;
477 
478  RVector m_r ;
479  XVector m_x ;
480  YVector m_y ;
481  aVector m_a;
482  bVector m_b;
483  size_type n;
484  size_type start;
485 
486  MV_AddUnrollFunctor() {n=UNROLL;}
487  //--------------------------------------------------------------------------
488 
489  KOKKOS_INLINE_FUNCTION
490  void operator()( const size_type i ) const
491  {
492  if((scalar_x==1)&&(scalar_y==1)){
493 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
494 #pragma unroll
495 #endif
496  for(size_type k=0;k<UNROLL;k++)
497  m_r(i,k) = m_x(i,k) + m_y(i,k);
498  }
499  if((scalar_x==1)&&(scalar_y==-1)){
500 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
501 #pragma unroll
502 #endif
503  for(size_type k=0;k<UNROLL;k++)
504  m_r(i,k) = m_x(i,k) - m_y(i,k);
505  }
506  if((scalar_x==-1)&&(scalar_y==-1)){
507 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
508 #pragma unroll
509 #endif
510 for(size_type k=0;k<UNROLL;k++)
511  m_r(i,k) = -m_x(i,k) - m_y(i,k);
512  }
513  if((scalar_x==-1)&&(scalar_y==1)){
514 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
515 #pragma unroll
516 #endif
517 for(size_type k=0;k<UNROLL;k++)
518  m_r(i,k) = -m_x(i,k) + m_y(i,k);
519  }
520  if((scalar_x==2)&&(scalar_y==1)){
521 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
522 #pragma unroll
523 #endif
524 for(size_type k=0;k<UNROLL;k++)
525  m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
526  }
527  if((scalar_x==2)&&(scalar_y==-1)){
528 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
529 #pragma unroll
530 #endif
531 for(size_type k=0;k<UNROLL;k++)
532  m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
533  }
534  if((scalar_x==1)&&(scalar_y==2)){
535 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
536 #pragma unroll
537 #endif
538 for(size_type k=0;k<UNROLL;k++)
539  m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
540  }
541  if((scalar_x==-1)&&(scalar_y==2)){
542 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
543 #pragma unroll
544 #endif
545 for(size_type k=0;k<UNROLL;k++)
546  m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
547  }
548  if((scalar_x==2)&&(scalar_y==2)){
549 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
550 #pragma unroll
551 #endif
552 for(size_type k=0;k<UNROLL;k++)
553  m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
554  }
555  }
556 };
557 
558 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y>
559 struct MV_AddVectorFunctor
560 {
561  typedef typename RVector::device_type device_type;
562  typedef typename RVector::size_type size_type;
563 
564  RVector m_r ;
565  XVector m_x ;
566  YVector m_y ;
567  aVector m_a;
568  bVector m_b;
569  size_type n;
570 
571  MV_AddVectorFunctor() {n=1;}
572  //--------------------------------------------------------------------------
573 
574  KOKKOS_INLINE_FUNCTION
575  void operator()( const size_type i ) const
576  {
577  if((scalar_x==1)&&(scalar_y==1))
578 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
579 #pragma ivdep
580 #endif
581 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
582 #pragma vector always
583 #endif
584  for(size_type k=0;k<n;k++)
585  m_r(i,k) = m_x(i,k) + m_y(i,k);
586  if((scalar_x==1)&&(scalar_y==-1))
587 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
588 #pragma ivdep
589 #endif
590 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
591 #pragma vector always
592 #endif
593  for(size_type k=0;k<n;k++)
594  m_r(i,k) = m_x(i,k) - m_y(i,k);
595  if((scalar_x==-1)&&(scalar_y==-1))
596 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
597 #pragma ivdep
598 #endif
599 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
600 #pragma vector always
601 #endif
602  for(size_type k=0;k<n;k++)
603  m_r(i,k) = -m_x(i,k) - m_y(i,k);
604  if((scalar_x==-1)&&(scalar_y==1))
605 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
606 #pragma ivdep
607 #endif
608 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
609 #pragma vector always
610 #endif
611  for(size_type k=0;k<n;k++)
612  m_r(i,k) = -m_x(i,k) + m_y(i,k);
613  if((scalar_x==2)&&(scalar_y==1))
614 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
615 #pragma ivdep
616 #endif
617 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
618 #pragma vector always
619 #endif
620  for(size_type k=0;k<n;k++)
621  m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
622  if((scalar_x==2)&&(scalar_y==-1))
623 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
624 #pragma ivdep
625 #endif
626 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
627 #pragma vector always
628 #endif
629  for(size_type k=0;k<n;k++)
630  m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
631  if((scalar_x==1)&&(scalar_y==2))
632 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
633 #pragma ivdep
634 #endif
635 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
636 #pragma vector always
637 #endif
638  for(size_type k=0;k<n;k++)
639  m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
640  if((scalar_x==-1)&&(scalar_y==2))
641 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
642 #pragma ivdep
643 #endif
644 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
645 #pragma vector always
646 #endif
647  for(size_type k=0;k<n;k++)
648  m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
649  if((scalar_x==2)&&(scalar_y==2))
650 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
651 #pragma ivdep
652 #endif
653 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
654 #pragma vector always
655 #endif
656  for(size_type k=0;k<n;k++)
657  m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
658 
659  }
660 };
661 
662 /* Variants of Functors with a and b being scalars. */
663 
664 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y,int UNROLL>
665 struct MV_AddUnrollFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y,UNROLL>
666 {
667  typedef typename RVector::device_type device_type;
668  typedef typename RVector::size_type size_type;
669 
670  RVector m_r ;
671  XVector m_x ;
672  YVector m_y ;
673  typename XVector::non_const_value_type m_a;
674  typename YVector::non_const_value_type m_b;
675  size_type n;
676  size_type start;
677 
678  MV_AddUnrollFunctor() {n=UNROLL;}
679  //--------------------------------------------------------------------------
680 
681  KOKKOS_INLINE_FUNCTION
682  void operator()( const size_type i ) const
683  {
684  if((scalar_x==1)&&(scalar_y==1)){
685 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
686 #pragma unroll
687 #endif
688  for(size_type k=0;k<UNROLL;k++)
689  m_r(i,k) = m_x(i,k) + m_y(i,k);
690  }
691  if((scalar_x==1)&&(scalar_y==-1)){
692 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
693 #pragma unroll
694 #endif
695  for(size_type k=0;k<UNROLL;k++)
696  m_r(i,k) = m_x(i,k) - m_y(i,k);
697  }
698  if((scalar_x==-1)&&(scalar_y==-1)){
699 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
700 #pragma unroll
701 #endif
702 for(size_type k=0;k<UNROLL;k++)
703  m_r(i,k) = -m_x(i,k) - m_y(i,k);
704  }
705  if((scalar_x==-1)&&(scalar_y==1)){
706 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
707 #pragma unroll
708 #endif
709 for(size_type k=0;k<UNROLL;k++)
710  m_r(i,k) = -m_x(i,k) + m_y(i,k);
711  }
712  if((scalar_x==2)&&(scalar_y==1)){
713 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
714 #pragma unroll
715 #endif
716 for(size_type k=0;k<UNROLL;k++)
717  m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
718  }
719  if((scalar_x==2)&&(scalar_y==-1)){
720 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
721 #pragma unroll
722 #endif
723 for(size_type k=0;k<UNROLL;k++)
724  m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
725  }
726  if((scalar_x==1)&&(scalar_y==2)){
727 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
728 #pragma unroll
729 #endif
730 for(size_type k=0;k<UNROLL;k++)
731  m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
732  }
733  if((scalar_x==-1)&&(scalar_y==2)){
734 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
735 #pragma unroll
736 #endif
737 for(size_type k=0;k<UNROLL;k++)
738  m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
739  }
740  if((scalar_x==2)&&(scalar_y==2)){
741 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
742 #pragma unroll
743 #endif
744 for(size_type k=0;k<UNROLL;k++)
745  m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
746  }
747  }
748 };
749 
750 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y>
751 struct MV_AddVectorFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y>
752 {
753  typedef typename RVector::device_type device_type;
754  typedef typename RVector::size_type size_type;
755 
756  RVector m_r ;
757  XVector m_x ;
758  YVector m_y ;
759  typename XVector::non_const_value_type m_a;
760  typename YVector::non_const_value_type m_b;
761  size_type n;
762 
763  MV_AddVectorFunctor() {n=1;}
764  //--------------------------------------------------------------------------
765 
766  KOKKOS_INLINE_FUNCTION
767  void operator()( const size_type i ) const
768  {
769  if((scalar_x==1)&&(scalar_y==1))
770 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
771 #pragma ivdep
772 #endif
773 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
774 #pragma vector always
775 #endif
776  for(size_type k=0;k<n;k++)
777  m_r(i,k) = m_x(i,k) + m_y(i,k);
778  if((scalar_x==1)&&(scalar_y==-1))
779 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
780 #pragma ivdep
781 #endif
782 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
783 #pragma vector always
784 #endif
785  for(size_type k=0;k<n;k++)
786  m_r(i,k) = m_x(i,k) - m_y(i,k);
787  if((scalar_x==-1)&&(scalar_y==-1))
788 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
789 #pragma ivdep
790 #endif
791 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
792 #pragma vector always
793 #endif
794  for(size_type k=0;k<n;k++)
795  m_r(i,k) = -m_x(i,k) - m_y(i,k);
796  if((scalar_x==-1)&&(scalar_y==1))
797 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
798 #pragma ivdep
799 #endif
800 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
801 #pragma vector always
802 #endif
803  for(size_type k=0;k<n;k++)
804  m_r(i,k) = -m_x(i,k) + m_y(i,k);
805  if((scalar_x==2)&&(scalar_y==1))
806 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
807 #pragma ivdep
808 #endif
809 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
810 #pragma vector always
811 #endif
812  for(size_type k=0;k<n;k++)
813  m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
814  if((scalar_x==2)&&(scalar_y==-1))
815 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
816 #pragma ivdep
817 #endif
818 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
819 #pragma vector always
820 #endif
821  for(size_type k=0;k<n;k++)
822  m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
823  if((scalar_x==1)&&(scalar_y==2))
824 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
825 #pragma ivdep
826 #endif
827 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
828 #pragma vector always
829 #endif
830  for(size_type k=0;k<n;k++)
831  m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
832  if((scalar_x==-1)&&(scalar_y==2))
833 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
834 #pragma ivdep
835 #endif
836 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
837 #pragma vector always
838 #endif
839  for(size_type k=0;k<n;k++)
840  m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
841  if((scalar_x==2)&&(scalar_y==2))
842 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
843 #pragma ivdep
844 #endif
845 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
846 #pragma vector always
847 #endif
848  for(size_type k=0;k<n;k++)
849  m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
850 
851  }
852 };
853 
854 template<class RVector,class aVector, class XVector, class bVector, class YVector,int UNROLL>
855 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x,
856  const bVector &bv, const YVector & y, int n,
857  int a=2,int b=2)
858 {
859  if(a==1&&b==1) {
860  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,1,UNROLL> op ;
861  op.m_r = r ;
862  op.m_x = x ;
863  op.m_y = y ;
864  op.m_a = av ;
865  op.m_b = bv ;
866  op.n = x.dimension(1);
867  Kokkos::parallel_for( n , op );
868  return r;
869  }
870  if(a==1&&b==-1) {
871  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,-1,UNROLL> op ;
872  op.m_r = r ;
873  op.m_x = x ;
874  op.m_y = y ;
875  op.m_a = av ;
876  op.m_b = bv ;
877  op.n = x.dimension(1);
878  Kokkos::parallel_for( n , op );
879  return r;
880  }
881  if(a==-1&&b==1) {
882  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,1,UNROLL> op ;
883  op.m_r = r ;
884  op.m_x = x ;
885  op.m_y = y ;
886  op.m_a = av ;
887  op.m_b = bv ;
888  op.n = x.dimension(1);
889  Kokkos::parallel_for( n , op );
890  return r;
891  }
892  if(a==-1&&b==-1) {
893  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1,UNROLL> op ;
894  op.m_r = r ;
895  op.m_x = x ;
896  op.m_y = y ;
897  op.m_a = av ;
898  op.m_b = bv ;
899  op.n = x.dimension(1);
900  Kokkos::parallel_for( n , op );
901  return r;
902  }
903  if(a*a!=1&&b==1) {
904  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,1,UNROLL> op ;
905  op.m_r = r ;
906  op.m_x = x ;
907  op.m_y = y ;
908  op.m_a = av ;
909  op.m_b = bv ;
910  op.n = x.dimension(1);
911  Kokkos::parallel_for( n , op );
912  return r;
913  }
914  if(a*a!=1&&b==-1) {
915  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,-1,UNROLL> op ;
916  op.m_r = r ;
917  op.m_x = x ;
918  op.m_y = y ;
919  op.m_a = av ;
920  op.m_b = bv ;
921  op.n = x.dimension(1);
922  Kokkos::parallel_for( n , op );
923  return r;
924  }
925  if(a==1&&b*b!=1) {
926  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,2,UNROLL> op ;
927  op.m_r = r ;
928  op.m_x = x ;
929  op.m_y = y ;
930  op.m_a = av ;
931  op.m_b = bv ;
932  op.n = x.dimension(1);
933  Kokkos::parallel_for( n , op );
934  return r;
935  }
936  if(a==-1&&b*b!=1) {
937  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,2,UNROLL> op ;
938  op.m_r = r ;
939  op.m_x = x ;
940  op.m_y = y ;
941  op.m_a = av ;
942  op.m_b = bv ;
943  op.n = x.dimension(1);
944  Kokkos::parallel_for( n , op );
945  return r;
946  }
947  MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,2,UNROLL> op ;
948  op.m_r = r ;
949  op.m_x = x ;
950  op.m_y = y ;
951  op.m_a = av ;
952  op.m_b = bv ;
953  op.n = x.dimension(1);
954  Kokkos::parallel_for( n , op );
955 
956  return r;
957 }
958 
959 template<class RVector,class aVector, class XVector, class bVector, class YVector>
960 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x,
961  const bVector &bv, const YVector & y, int n,
962  int a=2,int b=2)
963 {
964  switch (x.dimension(1)){
965  case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,n,a,b);
966  break;
967  case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,n,a,b);
968  break;
969  case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,n,a,b);
970  break;
971  case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,n,a,b);
972  break;
973  case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,n,a,b);
974  break;
975  case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,n,a,b);
976  break;
977  case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,n,a,b);
978  break;
979  case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,n,a,b);
980  break;
981  case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,n,a,b);
982  break;
983  case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,n,a,b);
984  break;
985  case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,n,a,b);
986  break;
987  case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,n,a,b);
988  break;
989  case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,n,a,b);
990  break;
991  case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,n,a,b);
992  break;
993  case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,n,a,b);
994  break;
995  case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,n,a,b);
996  break;
997  }
998  return r;
999 }
1000 
1001 
1002 template<class RVector,class aVector, class XVector, class bVector, class YVector>
1003 RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x,
1004  const bVector &bv, const YVector & y, int n,
1005  int a=2,int b=2)
1006 {
1007  if(a==1&&b==1) {
1008  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ;
1009  op.m_r = r ;
1010  op.m_x = x ;
1011  op.m_y = y ;
1012  op.m_a = av ;
1013  op.m_b = bv ;
1014  op.n = x.dimension(1);
1015  Kokkos::parallel_for( n , op );
1016  return r;
1017  }
1018  if(a==1&&b==-1) {
1019  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ;
1020  op.m_r = r ;
1021  op.m_x = x ;
1022  op.m_y = y ;
1023  op.m_a = av ;
1024  op.m_b = bv ;
1025  op.n = x.dimension(1);
1026  Kokkos::parallel_for( n , op );
1027  return r;
1028  }
1029  if(a==-1&&b==1) {
1030  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ;
1031  op.m_r = r ;
1032  op.m_x = x ;
1033  op.m_y = y ;
1034  op.m_a = av ;
1035  op.m_b = bv ;
1036  op.n = x.dimension(1);
1037  Kokkos::parallel_for( n , op );
1038  return r;
1039  }
1040  if(a==-1&&b==-1) {
1041  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ;
1042  op.m_r = r ;
1043  op.m_x = x ;
1044  op.m_y = y ;
1045  op.m_a = av ;
1046  op.m_b = bv ;
1047  op.n = x.dimension(1);
1048  Kokkos::parallel_for( n , op );
1049  return r;
1050  }
1051  if(a*a!=1&&b==1) {
1052  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ;
1053  op.m_r = r ;
1054  op.m_x = x ;
1055  op.m_y = y ;
1056  op.m_a = av ;
1057  op.m_b = bv ;
1058  op.n = x.dimension(1);
1059  Kokkos::parallel_for( n , op );
1060  return r;
1061  }
1062  if(a*a!=1&&b==-1) {
1063  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ;
1064  op.m_r = r ;
1065  op.m_x = x ;
1066  op.m_y = y ;
1067  op.m_a = av ;
1068  op.m_b = bv ;
1069  op.n = x.dimension(1);
1070  Kokkos::parallel_for( n , op );
1071  return r;
1072  }
1073  if(a==1&&b*b!=1) {
1074  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ;
1075  op.m_r = r ;
1076  op.m_x = x ;
1077  op.m_y = y ;
1078  op.m_a = av ;
1079  op.m_b = bv ;
1080  op.n = x.dimension(1);
1081  Kokkos::parallel_for( n , op );
1082  return r;
1083  }
1084  if(a==-1&&b*b!=1) {
1085  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ;
1086  op.m_r = r ;
1087  op.m_x = x ;
1088  op.m_y = y ;
1089  op.m_a = av ;
1090  op.m_b = bv ;
1091  op.n = x.dimension(1);
1092  Kokkos::parallel_for( n , op );
1093  return r;
1094  }
1095  MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ;
1096  op.m_r = r ;
1097  op.m_x = x ;
1098  op.m_y = y ;
1099  op.m_a = av ;
1100  op.m_b = bv ;
1101  op.n = x.dimension(1);
1102  Kokkos::parallel_for( n , op );
1103 
1104  return r;
1105 }
1106 
1107 template<class RVector,class aVector, class XVector, class bVector, class YVector>
1108 RVector MV_AddSpecialise( const RVector & r,const aVector &av,const XVector & x,
1109  const bVector &bv, const YVector & y, unsigned int n,
1110  int a=2,int b=2)
1111 {
1112 
1113  if(x.dimension(1)>16)
1114  return MV_AddVector( r,av,x,bv,y,a,b);
1115 
1116  if(x.dimension_1()==1) {
1117  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1118  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1119  typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1120 
1121  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1122  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1123  YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1124 
1125  V_Add(r_1d,av,x_1d,bv,y_1d,n);
1126  return r;
1127  } else
1128  return MV_AddUnroll( r,av,x,bv,y,a,b);
1129 }
1130 
1131 template<class RVector,class aVector, class XVector, class bVector, class YVector>
1132 RVector MV_Add( const RVector & r,const aVector &av,const XVector & x,
1133  const bVector &bv, const YVector & y, int n = -1)
1134 {
1135  if(n==-1) n = x.dimension_0();
1136  if(x.dimension(1)>16)
1137  return MV_AddVector( r,av,x,bv,y,n,2,2);
1138 
1139  if(x.dimension_1()==1) {
1140  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1141  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1142  typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1143 
1144  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1145  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1146  YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1147 
1148  V_Add(r_1d,av,x_1d,bv,y_1d,n);
1149  return r;
1150  } else
1151  return MV_AddUnroll( r,av,x,bv,y,n,2,2);
1152 }
1153 
1154 template<class RVector,class XVector,class YVector>
1155 RVector MV_Add( const RVector & r, const XVector & x, const YVector & y, int n = -1)
1156 {
1157  if(n==-1) n = x.dimension_0();
1158  if(x.dimension_1()==1) {
1159  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1160  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1161  typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1162 
1163  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1164  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1165  YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1166 
1167  V_Add(r_1d,x_1d,y_1d,n);
1168  return r;
1169  } else {
1170  typename XVector::const_value_type a = 1.0;
1171  return MV_AddSpecialise(r,a,x,a,y,n,1,1);
1172  }
1173 }
1174 
1175 template<class RVector,class XVector,class bVector, class YVector>
1176 RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const YVector & y, int n = -1 )
1177 {
1178  if(n==-1) n = x.dimension_0();
1179  if(x.dimension_1()==1) {
1180  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
1181  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
1182  typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
1183 
1184  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
1185  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
1186  YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
1187 
1188  V_Add(r_1d,x_1d,bv,y_1d,n);
1189  return r;
1190  } else {
1191  MV_AddSpecialise(r,bv,x,bv,y,n,1,2);
1192  }
1193 }
1194 
1195 
1196 template<class XVector,class YVector>
1197 struct MV_DotProduct_Right_FunctorVector
1198 {
1199  typedef typename XVector::device_type device_type;
1200  typedef typename XVector::size_type size_type;
1201  typedef typename XVector::non_const_value_type xvalue_type;
1202  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
1203  typedef typename IPT::dot_type value_type[];
1204  size_type value_count;
1205 
1206 
1207  typedef typename XVector::const_type x_const_type;
1208  typedef typename YVector::const_type y_const_type;
1209  x_const_type m_x ;
1210  y_const_type m_y ;
1211 
1212  //--------------------------------------------------------------------------
1213 
1214  KOKKOS_INLINE_FUNCTION
1215  void operator()( const size_type i, value_type sum ) const
1216  {
1217  const size_type numVecs=value_count;
1218 
1219 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1220 #pragma ivdep
1221 #endif
1222 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1223 #pragma vector always
1224 #endif
1225  for(size_type k=0;k<numVecs;k++)
1226  sum[k]+=IPT::dot( m_x(i,k), m_y(i,k) ); // m_x(i,k) * m_y(i,k)
1227  }
1228  KOKKOS_INLINE_FUNCTION void init( value_type update) const
1229  {
1230  const size_type numVecs = value_count;
1231 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1232 #pragma ivdep
1233 #endif
1234 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1235 #pragma vector always
1236 #endif
1237  for(size_type k=0;k<numVecs;k++)
1238  update[k] = 0;
1239  }
1240  KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
1241  const volatile value_type source ) const
1242  {
1243  const size_type numVecs = value_count;
1244 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1245 #pragma ivdep
1246 #endif
1247 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1248 #pragma vector always
1249 #endif
1250  for(size_type k=0;k<numVecs;k++){
1251  update[k] += source[k];
1252  }
1253  }
1254 };
1255 
1256 
1257 // Implementation detail of Tpetra::MultiVector::dot, when both
1258 // MultiVectors in the dot product have constant stride. Compute the
1259 // dot product of the local part of each corresponding vector (column)
1260 // in two MultiVectors.
1261 template<class MultiVecViewType>
1262 struct MultiVecDotFunctor {
1263  typedef typename MultiVecViewType::device_type device_type;
1264  typedef typename MultiVecViewType::size_type size_type;
1265  typedef typename MultiVecViewType::value_type mv_value_type;
1267  typedef typename IPT::dot_type value_type[];
1268 
1269  typedef MultiVecViewType mv_view_type;
1270  typedef typename MultiVecViewType::const_type mv_const_view_type;
1272 
1273  mv_const_view_type X_, Y_;
1274  dot_view_type dots_;
1275  // Kokkos::parallel_reduce wants this, so it needs to be public.
1276  size_type value_count;
1277 
1278  MultiVecDotFunctor (const mv_const_view_type& X,
1279  const mv_const_view_type& Y,
1280  const dot_view_type& dots) :
1281  X_ (X), Y_ (Y), dots_ (dots), value_count (X.dimension_1 ())
1282  {
1283  if (value_count != dots.dimension_0 ()) {
1284 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1285  cuda_abort("Kokkos::MultiVecDotFunctor: value_count does not match the length of 'dots'");
1286 #else
1287  std::ostringstream os;
1288  os << "Kokkos::MultiVecDotFunctor: value_count does not match the length "
1289  "of 'dots'. X is " << X.dimension_0 () << " x " << X.dimension_1 () <<
1290  ", Y is " << Y.dimension_0 () << " x " << Y.dimension_1 () << ", "
1291  "dots has length " << dots.dimension_0 () << ", and value_count = " <<
1292  value_count << ".";
1293  throw std::invalid_argument (os.str ());
1294 #endif
1295  }
1296 
1297  }
1298 
1299  KOKKOS_INLINE_FUNCTION void
1300  operator() (const size_type i, value_type sum) const
1301  {
1302  const size_type numVecs = value_count;
1303 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1304 #pragma ivdep
1305 #endif
1306 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1307 #pragma vector always
1308 #endif
1309  for (size_type k = 0; k < numVecs; ++k) {
1310  sum[k] += IPT::dot (X_(i,k), Y_(i,k));
1311  }
1312  }
1313 
1314  KOKKOS_INLINE_FUNCTION void
1315  init (value_type update) const
1316  {
1317  const size_type numVecs = value_count;
1318 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1319 #pragma ivdep
1320 #endif
1321 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1322 #pragma vector always
1323 #endif
1324  for (size_type k = 0; k < numVecs; ++k) {
1326  }
1327  }
1328 
1329  KOKKOS_INLINE_FUNCTION void
1330  join (volatile value_type update,
1331  const volatile value_type source) const
1332  {
1333  const size_type numVecs = value_count;
1334 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1335 #pragma ivdep
1336 #endif
1337 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1338 #pragma vector always
1339 #endif
1340  for (size_type k = 0; k < numVecs; ++k) {
1341  update[k] += source[k];
1342  }
1343  }
1344 
1345  // On device, write the reduction result to the output View.
1346  KOKKOS_INLINE_FUNCTION void
1347  final (const value_type dst) const
1348  {
1349  const size_type numVecs = value_count;
1350 
1351 #if !defined(__CUDA_ARCH__)
1352  // DEBUGGING ONLY
1353  {
1354  std::ostringstream os;
1355  os << "numVecs: " << numVecs << ", dst: [";
1356  for (size_t j = 0; j < numVecs; ++j) {
1357  os << dst[j];
1358  if (j + 1 < numVecs) {
1359  os << ", ";
1360  }
1361  }
1362  os << "]" << std::endl;
1363  std::cerr << os.str ();
1364  }
1365 #endif
1366 
1367 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1368 #pragma ivdep
1369 #endif
1370 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1371 #pragma vector always
1372 #endif
1373  for (size_type k = 0; k < numVecs; ++k) {
1374  dots_(k) = dst[k];
1375  }
1376 
1377 #if !defined(__CUDA_ARCH__)
1378  // DEBUGGING ONLY
1379  {
1380  std::ostringstream os;
1381  os << "numVecs: " << numVecs << ", dots_: [";
1382  for (size_t j = 0; j < numVecs; ++j) {
1383  os << dots_(j);
1384  if (j + 1 < numVecs) {
1385  os << ", ";
1386  }
1387  }
1388  os << "]" << std::endl;
1389  std::cerr << os.str ();
1390  }
1391 #endif
1392  }
1393 };
1394 
1395 
1396 // Implementation detail of Tpetra::MultiVector::norm2, when the
1397 // MultiVector has constant stride. Compute the square of the
1398 // two-norm of each column of a multivector.
1399 template<class MultiVecViewType>
1400 struct MultiVecNorm2SquaredFunctor {
1401  typedef typename MultiVecViewType::device_type device_type;
1402  typedef typename MultiVecViewType::size_type size_type;
1403  typedef typename MultiVecViewType::value_type mv_value_type;
1405  typedef typename IPT::mag_type value_type[];
1406 
1407  typedef MultiVecViewType mv_view_type;
1408  typedef typename MultiVecViewType::const_type mv_const_view_type;
1410 
1411  mv_const_view_type X_;
1412  norms_view_type norms_;
1413  // Kokkos::parallel_reduce wants this, so it needs to be public.
1414  size_type value_count;
1415 
1416  MultiVecNorm2SquaredFunctor (const mv_const_view_type& X,
1417  const norms_view_type& norms) :
1418  X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1419  {
1420  if (value_count != norms.dimension_0 ()) {
1421 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1422  cuda_abort("Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match the length of 'norms'");
1423 #else
1424  std::ostringstream os;
1425  os << "Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match "
1426  "the length of 'norms'. X is " << X.dimension_0 () << " x " <<
1427  X.dimension_1 () << ", norms has length " << norms.dimension_0 () <<
1428  ", and value_count = " << value_count << ".";
1429  throw std::invalid_argument (os.str ());
1430 #endif
1431  }
1432  }
1433 
1434  KOKKOS_INLINE_FUNCTION void
1435  operator() (const size_type i, value_type sum) const
1436  {
1437  const size_type numVecs = value_count;
1438 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1439 #pragma ivdep
1440 #endif
1441 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1442 #pragma vector always
1443 #endif
1444  for (size_type k = 0; k < numVecs; ++k) {
1445  const typename IPT::mag_type tmp = IPT::norm (X_(i,k));
1446  sum[k] += tmp * tmp;
1447  }
1448  }
1449 
1450  KOKKOS_INLINE_FUNCTION void
1451  init (value_type update) const
1452  {
1453  const size_type numVecs = value_count;
1454 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1455 #pragma ivdep
1456 #endif
1457 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1458 #pragma vector always
1459 #endif
1460  for (size_type k = 0; k < numVecs; ++k) {
1462  }
1463  }
1464 
1465  KOKKOS_INLINE_FUNCTION void
1466  join (volatile value_type update,
1467  const volatile value_type source) const
1468  {
1469  const size_type numVecs = value_count;
1470 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1471 #pragma ivdep
1472 #endif
1473 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1474 #pragma vector always
1475 #endif
1476  for (size_type k = 0; k < numVecs; ++k) {
1477  update[k] += source[k];
1478  }
1479  }
1480 
1481  // On device, write the reduction result to the output View.
1482  KOKKOS_INLINE_FUNCTION void
1483  final (const value_type dst) const
1484  {
1485  const size_type numVecs = value_count;
1486 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1487 #pragma ivdep
1488 #endif
1489 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1490 #pragma vector always
1491 #endif
1492  for (size_type k = 0; k < numVecs; ++k) {
1493  norms_(k) = dst[k];
1494  }
1495  }
1496 };
1497 
1498 
1499 // Implementation detail of Tpetra::MultiVector::norm1, when the
1500 // MultiVector has constant stride. Compute the one-norm of each
1501 // column of a multivector.
1502 template<class MultiVecViewType>
1503 struct MultiVecNorm1Functor {
1504  typedef typename MultiVecViewType::device_type device_type;
1505  typedef typename MultiVecViewType::size_type size_type;
1506  typedef typename MultiVecViewType::value_type mv_value_type;
1508  typedef typename IPT::mag_type value_type[];
1509 
1510  typedef MultiVecViewType mv_view_type;
1511  typedef typename MultiVecViewType::const_type mv_const_view_type;
1513 
1514  mv_const_view_type X_;
1515  norms_view_type norms_;
1516  // Kokkos::parallel_reduce wants this, so it needs to be public.
1517  size_type value_count;
1518 
1519  MultiVecNorm1Functor (const mv_const_view_type& X,
1520  const norms_view_type& norms) :
1521  X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1522  {
1523  if (value_count != norms.dimension_0 ()) {
1524 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1525  cuda_abort("Kokkos::MultiVecNorm1Functor: value_count does not match the length of 'norms'");
1526 #else
1527  std::ostringstream os;
1528  os << "Kokkos::MultiVecNorm1Functor: value_count does not match the "
1529  << "length of 'norms'. X is " << X.dimension_0 () << " x "
1530  << X.dimension_1 () << ", norms has length " << norms.dimension_0 ()
1531  << ", and value_count = " << value_count << ".";
1532  throw std::invalid_argument (os.str ());
1533 #endif
1534  }
1535  }
1536 
1537  KOKKOS_INLINE_FUNCTION void
1538  operator() (const size_type i, value_type sum) const
1539  {
1540  const size_type numVecs = value_count;
1541 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1542 #pragma ivdep
1543 #endif
1544 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1545 #pragma vector always
1546 #endif
1547  for (size_type k = 0; k < numVecs; ++k) {
1548  sum[k] += IPT::norm (X_(i,k)); // absolute value
1549  }
1550  }
1551 
1552  KOKKOS_INLINE_FUNCTION void
1553  init (value_type update) const
1554  {
1555  const size_type numVecs = value_count;
1556 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1557 #pragma ivdep
1558 #endif
1559 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1560 #pragma vector always
1561 #endif
1562  for (size_type k = 0; k < numVecs; ++k) {
1564  }
1565  }
1566 
1567  KOKKOS_INLINE_FUNCTION void
1568  join (volatile value_type update,
1569  const volatile value_type source) const
1570  {
1571  const size_type numVecs = value_count;
1572 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1573 #pragma ivdep
1574 #endif
1575 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1576 #pragma vector always
1577 #endif
1578  for (size_type k = 0; k < numVecs; ++k) {
1579  update[k] += source[k];
1580  }
1581  }
1582 
1583  // On device, write the reduction result to the output View.
1584  KOKKOS_INLINE_FUNCTION void
1585  final (const value_type dst) const
1586  {
1587  const size_type numVecs = value_count;
1588 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1589 #pragma ivdep
1590 #endif
1591 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1592 #pragma vector always
1593 #endif
1594  for (size_type k = 0; k < numVecs; ++k) {
1595  norms_(k) = dst[k];
1596  }
1597  }
1598 };
1599 
1600 
1601 // Implementation detail of Tpetra::MultiVector::normInf, when the
1602 // MultiVector has constant stride. Compute the infinity-norm of each
1603 // column of a multivector.
1604 template<class MultiVecViewType>
1605 struct MultiVecNormInfFunctor {
1606  typedef typename MultiVecViewType::device_type device_type;
1607  typedef typename MultiVecViewType::size_type size_type;
1608  typedef typename MultiVecViewType::value_type mv_value_type;
1610  typedef typename IPT::mag_type value_type[];
1611 
1612  typedef MultiVecViewType mv_view_type;
1613  typedef typename MultiVecViewType::const_type mv_const_view_type;
1615 
1616  mv_const_view_type X_;
1617  norms_view_type norms_;
1618  // Kokkos::parallel_reduce wants this, so it needs to be public.
1619  size_type value_count;
1620 
1621  MultiVecNormInfFunctor (const mv_const_view_type& X,
1622  const norms_view_type& norms) :
1623  X_ (X), norms_ (norms), value_count (X.dimension_1 ())
1624  {
1625  if (value_count != norms.dimension_0 ()) {
1626 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
1627  cuda_abort("Kokkos::MultiVecNormInfFunctor: value_count does not match the length of 'norms'");
1628 #else
1629  std::ostringstream os;
1630  os << "Kokkos::MultiVecNormInfFunctor: value_count does not match the "
1631  << "length of 'norms'. X is " << X.dimension_0 () << " x "
1632  << X.dimension_1 () << ", norms has length " << norms.dimension_0 ()
1633  << ", and value_count = " << value_count << ".";
1634  throw std::invalid_argument (os.str ());
1635 #endif
1636  }
1637  }
1638 
1639  KOKKOS_INLINE_FUNCTION void
1640  operator() (const size_type i, value_type maxes) const
1641  {
1642  const size_type numVecs = value_count;
1643 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1644 #pragma ivdep
1645 #endif
1646 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1647 #pragma vector always
1648 #endif
1649  for (size_type k = 0; k < numVecs; ++k) {
1650  const typename IPT::mag_type curVal = maxes[k];
1651  const typename IPT::mag_type newVal = IPT::norm (X_(i,k));
1652  // max(curVal, newVal). Any comparison predicate involving NaN
1653  // evaluates to false. Thus, this will never assign NaN to
1654  // update[k], unless it contains NaN already. The initial value
1655  // is zero, so NaNs won't propagate. (This definition makes NaN
1656  // into an "invalid value," which is useful for statistics and
1657  // other applications that use NaN to indicate a "hole.")
1658  if (curVal < newVal) {
1659  maxes[k] = newVal;
1660  }
1661  }
1662  }
1663 
1664  KOKKOS_INLINE_FUNCTION void
1665  init (value_type update) const
1666  {
1667  const size_type numVecs = value_count;
1668 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1669 #pragma ivdep
1670 #endif
1671 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1672 #pragma vector always
1673 #endif
1674  for (size_type k = 0; k < numVecs; ++k) {
1675  // Zero is a good default value for magnitudes (which are
1676  // nonnegative by definition). That way, MPI processes with
1677  // zero rows won't affect the global maximum.
1679  }
1680  }
1681 
1682  KOKKOS_INLINE_FUNCTION void
1683  join (volatile value_type update,
1684  const volatile value_type source) const
1685  {
1686  const size_type numVecs = value_count;
1687 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1688 #pragma ivdep
1689 #endif
1690 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1691 #pragma vector always
1692 #endif
1693  for (size_type k = 0; k < numVecs; ++k) {
1694  const typename IPT::mag_type curVal = update[k];
1695  const typename IPT::mag_type newVal = source[k];
1696  // max(curVal, newVal). Any comparison predicate involving NaN
1697  // evaluates to false. Thus, this will never assign NaN to
1698  // update[k], unless it contains NaN already. The initial value
1699  // is zero, so NaNs won't propagate. (This definition makes NaN
1700  // into an "invalid value," which is useful for statistics and
1701  // other applications that use NaN to indicate a "hole.")
1702  if (curVal < newVal) {
1703  update[k] = newVal;
1704  }
1705  }
1706  }
1707 
1708  // On device, write the reduction result to the output View.
1709  KOKKOS_INLINE_FUNCTION void
1710  final (const value_type dst) const
1711  {
1712  const size_type numVecs = value_count;
1713 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
1714 #pragma ivdep
1715 #endif
1716 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
1717 #pragma vector always
1718 #endif
1719  for (size_type k = 0; k < numVecs; ++k) {
1720  norms_(k) = dst[k];
1721  }
1722  }
1723 };
1724 
1725 
1726 // Implementation detail of Tpetra::MultiVector::dot, for single
1727 // vectors (columns).
1728 template<class VecViewType>
1729 struct VecDotFunctor {
1730  typedef typename VecViewType::device_type device_type;
1731  typedef typename VecViewType::size_type size_type;
1732  typedef typename VecViewType::value_type mv_value_type;
1734  typedef typename IPT::dot_type value_type;
1735  typedef typename VecViewType::const_type vec_const_view_type;
1736  // This is a nonconst scalar view. It holds one dot_type instance.
1739 
1740  vec_const_view_type x_, y_;
1741  dot_view_type dot_;
1742 
1743  VecDotFunctor (const vec_const_view_type& x,
1744  const vec_const_view_type& y,
1745  const dot_view_type& dot) :
1746  x_ (x), y_ (y), dot_ (dot)
1747  {
1748  if (x.dimension_0 () != y.dimension_0 ()) {
1749  std::ostringstream os;
1750  os << "Kokkos::VecDotFunctor: The dimensions of x and y do not match. "
1751  "x.dimension_0() = " << x.dimension_0 ()
1752  << " != y.dimension_0() = " << y.dimension_0 () << ".";
1753  throw std::invalid_argument (os.str ());
1754  }
1755  }
1756 
1757  KOKKOS_INLINE_FUNCTION void
1758  operator() (const size_type i, value_type& sum) const {
1759  sum += IPT::dot (x_(i), y_(i));
1760  }
1761 
1762  KOKKOS_INLINE_FUNCTION void
1763  init (value_type& update) const {
1765  }
1766 
1767  KOKKOS_INLINE_FUNCTION void
1768  join (volatile value_type& update,
1769  const volatile value_type& source) const {
1770  update += source;
1771  }
1772 
1773  // On device, write the reduction result to the output View.
1774  KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
1775  // BADNESS HERE
1776  dot_() = dst;
1777  }
1778 };
1779 
1780 
1781 // Compute the square of the two-norm of a single vector.
1782 template<class VecViewType>
1783 struct VecNorm2SquaredFunctor {
1784  typedef typename VecViewType::device_type device_type;
1785  typedef typename VecViewType::size_type size_type;
1786  typedef typename VecViewType::value_type mv_value_type;
1788  typedef typename IPT::mag_type value_type;
1789  typedef typename VecViewType::const_type vec_const_view_type;
1790  // This is a nonconst scalar view. It holds one mag_type instance.
1792 
1793  vec_const_view_type x_;
1794  norm_view_type norm_;
1795 
1796  // Constructor
1797  //
1798  // x: the vector for which to compute the square of the two-norm.
1799  // norm: scalar View into which to put the result.
1800  VecNorm2SquaredFunctor (const vec_const_view_type& x,
1801  const norm_view_type& norm) :
1802  x_ (x), norm_ (norm)
1803  {}
1804 
1805  KOKKOS_INLINE_FUNCTION void
1806  operator() (const size_type i, value_type& sum) const {
1807  const typename IPT::mag_type tmp = IPT::norm (x_(i));
1808  sum += tmp * tmp;
1809  }
1810 
1811  KOKKOS_INLINE_FUNCTION void
1812  init (value_type& update) const {
1814  }
1815 
1816  KOKKOS_INLINE_FUNCTION void
1817  join (volatile value_type& update,
1818  const volatile value_type& source) const {
1819  update += source;
1820  }
1821 
1822  // On device, write the reduction result to the output View.
1823  KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
1824  norm_ () = dst;
1825  }
1826 };
1827 
1828 
1829 // Compute the square of the one-norm of a single vector.
1830 template<class VecViewType>
1831 struct VecNorm1Functor {
1832  typedef typename VecViewType::device_type device_type;
1833  typedef typename VecViewType::size_type size_type;
1834  typedef typename VecViewType::value_type mv_value_type;
1836  typedef typename IPT::mag_type value_type;
1837  typedef typename VecViewType::const_type vec_const_view_type;
1838  // This is a nonconst scalar view. It holds one mag_type instance.
1840 
1841  vec_const_view_type x_;
1842  norm_view_type norm_;
1843 
1844  // Constructor
1845  //
1846  // x: the vector for which to compute the one-norm.
1847  // norm: scalar View into which to put the result.
1848  VecNorm1Functor (const vec_const_view_type& x,
1849  const norm_view_type& norm) :
1850  x_ (x), norm_ (norm)
1851  {}
1852 
1853  KOKKOS_INLINE_FUNCTION void
1854  operator() (const size_type i, value_type& sum) const {
1855  sum += IPT::norm (x_(i)); // absolute value
1856  }
1857 
1858  KOKKOS_INLINE_FUNCTION void
1859  init (value_type& update) const {
1861  }
1862 
1863  KOKKOS_INLINE_FUNCTION void
1864  join (volatile value_type& update,
1865  const volatile value_type& source) const {
1866  update += source;
1867  }
1868 
1869  // On device, write the reduction result to the output View.
1870  KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
1871  norm_ () = dst;
1872  }
1873 };
1874 
1875 
1876 // Compute the square of the infinity-norm of a single vector.
1877 template<class VecViewType>
1878 struct VecNormInfFunctor {
1879  typedef typename VecViewType::device_type device_type;
1880  typedef typename VecViewType::size_type size_type;
1881  typedef typename VecViewType::value_type mv_value_type;
1883  typedef typename IPT::mag_type value_type;
1884  typedef typename VecViewType::const_type vec_const_view_type;
1885  // This is a nonconst scalar view. It holds one mag_type instance.
1887 
1888  vec_const_view_type x_;
1889  norm_view_type norm_;
1890 
1891  // Constructor
1892  //
1893  // x: the vector for which to compute the infinity-norm.
1894  // norm: scalar View into which to put the result.
1895  VecNormInfFunctor (const vec_const_view_type& x,
1896  const norm_view_type& norm) :
1897  x_ (x), norm_ (norm)
1898  {}
1899 
1900  KOKKOS_INLINE_FUNCTION void
1901  operator() (const size_type i, value_type& curVal) const {
1902  const typename IPT::mag_type newVal = IPT::norm (x_(i));
1903  // max(curVal, newVal). Any comparison predicate involving NaN
1904  // evaluates to false. Thus, this will never assign NaN to
1905  // update[k], unless it contains NaN already. The initial value
1906  // is zero, so NaNs won't propagate. (This definition makes NaN
1907  // into an "invalid value," which is useful for statistics and
1908  // other applications that use NaN to indicate a "hole.")
1909  if (curVal < newVal) {
1910  curVal = newVal;
1911  }
1912  }
1913 
1914  KOKKOS_INLINE_FUNCTION void
1915  init (value_type& update) const {
1916  // Zero is a good default value for magnitudes (which are
1917  // nonnegative by definition). That way, MPI processes with
1918  // zero rows won't affect the global maximum.
1920  }
1921 
1922  KOKKOS_INLINE_FUNCTION void
1923  join (volatile value_type& update,
1924  const volatile value_type& source) const {
1925  // max(update, source). Any comparison predicate involving NaN
1926  // evaluates to false. Thus, this will never assign NaN to
1927  // update, unless it contains NaN already. The initial value is
1928  // zero, so NaNs won't propagate. (This definition makes NaN into
1929  // an "invalid value," which is useful for statistics and other
1930  // applications that use NaN to indicate a "hole.")
1931  if (update < source) {
1932  update = source;
1933  }
1934  }
1935 
1936  // On device, write the reduction result to the output View.
1937  KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
1938  norm_ () = dst;
1939  }
1940 };
1941 
1942 
1943 
1944 // parallel_for functor for computing the square root, in place, of a
1945 // one-dimensional View. This is useful for following the MPI
1946 // all-reduce that computes the square of the two-norms of the local
1947 // columns of a Tpetra::MultiVector.
1948 //
1949 // mfh 14 Jul 2014: Carter says that, for now, the standard idiom for
1950 // operating on a single scalar value on the device, is to run in a
1951 // parallel_for with N = 1.
1952 //
1953 // FIXME (mfh 14 Jul 2014): If we could assume C++11, this functor
1954 // would go away.
1955 template<class ViewType>
1956 class SquareRootFunctor {
1957 public:
1958  typedef typename ViewType::device_type device_type;
1959  typedef typename ViewType::size_type size_type;
1960 
1961  SquareRootFunctor (const ViewType& theView) : theView_ (theView) {}
1962 
1963  KOKKOS_INLINE_FUNCTION void operator() (const size_type i) const {
1964  typedef typename ViewType::value_type value_type;
1965  theView_(i) = Kokkos::Details::ArithTraits<value_type>::sqrt (theView_(i));
1966  }
1967 
1968 private:
1969  ViewType theView_;
1970 };
1971 
1972 
1973 template<class XVector,class YVector,int UNROLL>
1974 struct MV_DotProduct_Right_FunctorUnroll
1975 {
1976  typedef typename XVector::device_type device_type;
1977  typedef typename XVector::size_type size_type;
1978  typedef typename XVector::non_const_value_type xvalue_type;
1979  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
1980  typedef typename IPT::dot_type value_type[];
1981  size_type value_count;
1982 
1983  typedef typename XVector::const_type x_const_type;
1984  typedef typename YVector::const_type y_const_type;
1985 
1986  x_const_type m_x ;
1987  y_const_type m_y ;
1988 
1989  //--------------------------------------------------------------------------
1990 
1991  KOKKOS_INLINE_FUNCTION
1992  void operator()( const size_type i, value_type sum ) const
1993  {
1994 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
1995 #pragma unroll
1996 #endif
1997  for(size_type k=0;k<UNROLL;k++)
1998  sum[k]+= IPT::dot( m_x(i,k), m_y(i,k) ); // m_x(i,k) * m_y(i,k)
1999  }
2000  KOKKOS_INLINE_FUNCTION void init( volatile value_type update) const
2001  {
2002 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
2003 #pragma unroll
2004 #endif
2005  for(size_type k=0;k<UNROLL;k++)
2006  update[k] = 0;
2007  }
2008  KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
2009  const volatile value_type source) const
2010  {
2011 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
2012 #pragma unroll
2013 #endif
2014  for(size_type k=0;k<UNROLL;k++)
2015  update[k] += source[k] ;
2016  }
2017 };
2018 
2019 template<class rVector, class XVector, class YVector>
2020 rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = -1)
2021 {
2022  typedef typename XVector::size_type size_type;
2023  const size_type numVecs = x.dimension(1);
2024 
2025  if(n<0) n = x.dimension_0();
2026  if(numVecs>16){
2027 
2028  MV_DotProduct_Right_FunctorVector<XVector,YVector> op;
2029  op.m_x = x;
2030  op.m_y = y;
2031  op.value_count = numVecs;
2032 
2033  Kokkos::parallel_reduce( n , op, r );
2034  return r;
2035  }
2036  else
2037  switch(numVecs) {
2038  case 16: {
2039  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op;
2040  op.m_x = x;
2041  op.m_y = y;
2042  op.value_count = numVecs;
2043  Kokkos::parallel_reduce( n , op, r );
2044  break;
2045  }
2046  case 15: {
2047  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op;
2048  op.m_x = x;
2049  op.m_y = y;
2050  op.value_count = numVecs;
2051  Kokkos::parallel_reduce( n , op, r );
2052  break;
2053  }
2054  case 14: {
2055  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op;
2056  op.m_x = x;
2057  op.m_y = y;
2058  op.value_count = numVecs;
2059  Kokkos::parallel_reduce( n , op, r );
2060  break;
2061  }
2062  case 13: {
2063  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op;
2064  op.m_x = x;
2065  op.m_y = y;
2066  op.value_count = numVecs;
2067  Kokkos::parallel_reduce( n , op, r );
2068  break;
2069  }
2070  case 12: {
2071  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op;
2072  op.m_x = x;
2073  op.m_y = y;
2074  op.value_count = numVecs;
2075  Kokkos::parallel_reduce( n , op, r );
2076  break;
2077  }
2078  case 11: {
2079  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op;
2080  op.m_x = x;
2081  op.m_y = y;
2082  op.value_count = numVecs;
2083  Kokkos::parallel_reduce( n , op, r );
2084  break;
2085  }
2086  case 10: {
2087  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op;
2088  op.m_x = x;
2089  op.m_y = y;
2090  op.value_count = numVecs;
2091  Kokkos::parallel_reduce( n , op, r );
2092  break;
2093  }
2094  case 9: {
2095  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op;
2096  op.m_x = x;
2097  op.m_y = y;
2098  op.value_count = numVecs;
2099  Kokkos::parallel_reduce( n , op, r );
2100  break;
2101  }
2102  case 8: {
2103  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op;
2104  op.m_x = x;
2105  op.m_y = y;
2106  op.value_count = numVecs;
2107  Kokkos::parallel_reduce( n , op, r );
2108  break;
2109  }
2110  case 7: {
2111  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op;
2112  op.m_x = x;
2113  op.m_y = y;
2114  op.value_count = numVecs;
2115  Kokkos::parallel_reduce( n , op, r );
2116  break;
2117  }
2118  case 6: {
2119  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op;
2120  op.m_x = x;
2121  op.m_y = y;
2122  op.value_count = numVecs;
2123  Kokkos::parallel_reduce( n , op, r );
2124  break;
2125  }
2126  case 5: {
2127  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op;
2128  op.m_x = x;
2129  op.m_y = y;
2130  op.value_count = numVecs;
2131  Kokkos::parallel_reduce( n , op, r );
2132  break;
2133  }
2134  case 4: {
2135  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op;
2136  op.m_x = x;
2137  op.m_y = y;
2138  op.value_count = numVecs;
2139  Kokkos::parallel_reduce( n , op, r );
2140 
2141  break;
2142  }
2143  case 3: {
2144  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op;
2145  op.m_x = x;
2146  op.m_y = y;
2147  op.value_count = numVecs;
2148  Kokkos::parallel_reduce( n , op, r );
2149  break;
2150  }
2151  case 2: {
2152  MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op;
2153  op.m_x = x;
2154  op.m_y = y;
2155  op.value_count = numVecs;
2156  Kokkos::parallel_reduce( n , op, r );
2157  break;
2158  }
2159  case 1: {
2160  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
2161  typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
2162 
2163  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
2164  YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
2165  r[0] = V_Dot(x_1d,y_1d,n);
2166  break;
2167  }
2168  }
2169 
2170  return r;
2171 }
2172 
2173 /*------------------------------------------------------------------------------------------
2174  *-------------------------- Compute Sum -------------------------------------------------
2175  *------------------------------------------------------------------------------------------*/
2176 template<class XVector>
2177 struct MV_Sum_Functor
2178 {
2179  typedef typename XVector::device_type device_type;
2180  typedef typename XVector::size_type size_type;
2181  typedef typename XVector::non_const_value_type xvalue_type;
2182  typedef xvalue_type value_type[];
2183 
2184  typename XVector::const_type m_x ;
2185  size_type value_count;
2186 
2187  MV_Sum_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2188  //--------------------------------------------------------------------------
2189 
2190  KOKKOS_INLINE_FUNCTION
2191  void operator()( const size_type i, value_type sum ) const
2192  {
2193 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2194 #pragma ivdep
2195 #endif
2196 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2197 #pragma vector always
2198 #endif
2199  for(size_type k=0;k<value_count;k++){
2200  sum[k] += m_x(i,k);
2201  }
2202  }
2203 
2204  KOKKOS_INLINE_FUNCTION
2205  void init( value_type update) const
2206  {
2207 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2208 #pragma ivdep
2209 #endif
2210 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2211 #pragma vector always
2212 #endif
2213  for(size_type k=0;k<value_count;k++)
2214  update[k] = 0;
2215  }
2216 
2217  KOKKOS_INLINE_FUNCTION
2218  void join( volatile value_type update ,
2219  const volatile value_type source ) const
2220  {
2221 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2222 #pragma ivdep
2223 #endif
2224 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2225 #pragma vector always
2226 #endif
2227  for(size_type k=0;k<value_count;k++){
2228  update[k] += source[k];
2229  }
2230  }
2231 };
2232 
2233 
2234 template<class normVector, class VectorType>
2235 normVector MV_Sum(const normVector &r, const VectorType & x, int n = -1)
2236 {
2237  if (n < 0) {
2238  n = x.dimension_0 ();
2239  }
2240 
2241  Kokkos::parallel_reduce (n , MV_Sum_Functor<VectorType> (x), r);
2242  return r;
2243 }
2244 
2245 /*------------------------------------------------------------------------------------------
2246  *-------------------------- Compute Norm1--------------------------------------------------
2247  *------------------------------------------------------------------------------------------*/
2248 template<class XVector>
2249 struct MV_Norm1_Functor
2250 {
2251  typedef typename XVector::device_type device_type;
2252  typedef typename XVector::size_type size_type;
2253  typedef typename XVector::non_const_value_type xvalue_type;
2254  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2255  typedef typename IPT::dot_type value_type[];
2256 
2257  typename XVector::const_type m_x ;
2258  size_type value_count;
2259 
2260  MV_Norm1_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2261  //--------------------------------------------------------------------------
2262 
2263  KOKKOS_INLINE_FUNCTION
2264  void operator()( const size_type i, value_type sum ) const
2265  {
2266 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2267 #pragma ivdep
2268 #endif
2269 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2270 #pragma vector always
2271 #endif
2272  for(size_type k=0;k<value_count;k++){
2274  }
2275  }
2276 
2277  KOKKOS_INLINE_FUNCTION
2278  void init( value_type update) const
2279  {
2280 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2281 #pragma ivdep
2282 #endif
2283 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2284 #pragma vector always
2285 #endif
2286  for(size_type k=0;k<value_count;k++)
2287  update[k] = 0;
2288  }
2289 
2290  KOKKOS_INLINE_FUNCTION
2291  void join( volatile value_type update ,
2292  const volatile value_type source ) const
2293  {
2294 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2295 #pragma ivdep
2296 #endif
2297 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2298 #pragma vector always
2299 #endif
2300  for(size_type k=0;k<value_count;k++){
2301  update[k] += source[k];
2302  }
2303  }
2304 };
2305 
2306 template<class normVector, class VectorType>
2307 normVector MV_Norm1(const normVector &r, const VectorType & x, int n = -1)
2308 {
2309  if (n < 0) {
2310  n = x.dimension_0 ();
2311  }
2312 
2313  Kokkos::parallel_reduce (n , MV_Norm1_Functor<VectorType> (x), r);
2314  return r;
2315 }
2316 
2317 /*------------------------------------------------------------------------------------------
2318  *-------------------------- Compute NormInf--------------------------------------------------
2319  *------------------------------------------------------------------------------------------*/
2320 template<class XVector>
2321 struct MV_NormInf_Functor
2322 {
2323  typedef typename XVector::device_type device_type;
2324  typedef typename XVector::size_type size_type;
2325  typedef typename XVector::non_const_value_type xvalue_type;
2326  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2327  typedef typename IPT::dot_type value_type[];
2328 
2329  typename XVector::const_type m_x ;
2330  size_type value_count;
2331 
2332  MV_NormInf_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
2333  //--------------------------------------------------------------------------
2334  KOKKOS_INLINE_FUNCTION
2335  void operator()( const size_type i, value_type sum ) const
2336  {
2337 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2338 #pragma ivdep
2339 #endif
2340 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2341 #pragma vector always
2342 #endif
2343  for(size_type k=0;k<value_count;k++){
2345  }
2346  }
2347 
2348  KOKKOS_INLINE_FUNCTION void init( value_type update) const
2349  {
2350 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2351 #pragma ivdep
2352 #endif
2353 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2354 #pragma vector always
2355 #endif
2356  for(size_type k=0;k<value_count;k++)
2357  update[k] = 0;
2358  }
2359  KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
2360  const volatile value_type source ) const
2361  {
2362 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2363 #pragma ivdep
2364 #endif
2365 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2366 #pragma vector always
2367 #endif
2368  for(size_type k=0;k<value_count;k++){
2369  update[k] = MAX(update[k],source[k]);
2370  }
2371  }
2372 };
2373 
2374 
2375 template<class normVector, class VectorType>
2376 normVector MV_NormInf(const normVector &r, const VectorType & x, int n = -1)
2377 {
2378  if (n < 0) {
2379  n = x.dimension_0 ();
2380  }
2381 
2382  Kokkos::parallel_reduce (n , MV_NormInf_Functor<VectorType> (x), r);
2383  return r;
2384 }
2385 
2386 /*------------------------------------------------------------------------------------------
2387  *-------------------------- Compute Weighted Dot-product (sum(x_i/w_i)^2)----------------------------------
2388  *------------------------------------------------------------------------------------------*/
2389 template<class WeightVector, class XVector,int WeightsRanks>
2390 struct MV_DotWeighted_Functor{};
2391 
2392 template<class WeightVector, class XVector>
2393 struct MV_DotWeighted_Functor<WeightVector,XVector,1>
2394 {
2395  typedef typename XVector::device_type device_type;
2396  typedef typename XVector::size_type size_type;
2397  typedef typename XVector::non_const_value_type xvalue_type;
2398  typedef typename WeightVector::non_const_value_type wvalue_type;
2399  typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2400  typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2401  typedef typename XIPT::dot_type value_type[];
2402 
2403  typename WeightVector::const_type m_w ;
2404  typename XVector::const_type m_x ;
2405  size_type value_count;
2406 
2407  MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
2408  //--------------------------------------------------------------------------
2409  KOKKOS_INLINE_FUNCTION
2410  void operator()( const size_type i, value_type sum ) const
2411  {
2412 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2413 #pragma ivdep
2414 #endif
2415 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2416 #pragma vector always
2417 #endif
2418  for(size_type k=0;k<value_count;k++){
2419  sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i), m_w(i) );
2420  }
2421  }
2422 
2423  KOKKOS_INLINE_FUNCTION void init( value_type update) const
2424  {
2425 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2426 #pragma ivdep
2427 #endif
2428 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2429 #pragma vector always
2430 #endif
2431  for(size_type k=0;k<value_count;k++)
2432  update[k] = 0;
2433  }
2434  KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
2435  const volatile value_type source ) const
2436  {
2437 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2438 #pragma ivdep
2439 #endif
2440 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2441 #pragma vector always
2442 #endif
2443  for(size_type k=0;k<value_count;k++){
2444  update[k] += source[k];
2445  }
2446  }
2447 };
2448 
2449 template<class WeightVector, class XVector>
2450 struct MV_DotWeighted_Functor<WeightVector,XVector,2>
2451 {
2452  typedef typename XVector::device_type device_type;
2453  typedef typename XVector::size_type size_type;
2454  typedef typename XVector::non_const_value_type xvalue_type;
2455  typedef typename WeightVector::non_const_value_type wvalue_type;
2456  typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2457  typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2458  typedef typename XIPT::dot_type value_type[];
2459 
2460  typename WeightVector::const_type m_w ;
2461  typename XVector::const_type m_x ;
2462  size_type value_count;
2463 
2464  MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
2465  //--------------------------------------------------------------------------
2466  KOKKOS_INLINE_FUNCTION
2467  void operator()( const size_type i, value_type sum ) const
2468  {
2469 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2470 #pragma ivdep
2471 #endif
2472 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2473 #pragma vector always
2474 #endif
2475  for(size_type k=0;k<value_count;k++){
2476  sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i,k), m_w(i,k) );
2477  }
2478  }
2479 
2480  KOKKOS_INLINE_FUNCTION void init( value_type update) const
2481  {
2482 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2483 #pragma ivdep
2484 #endif
2485 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2486 #pragma vector always
2487 #endif
2488  for(size_type k=0;k<value_count;k++)
2489  update[k] = 0;
2490  }
2491  KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
2492  const volatile value_type source ) const
2493  {
2494 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
2495 #pragma ivdep
2496 #endif
2497 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
2498 #pragma vector always
2499 #endif
2500  for(size_type k=0;k<value_count;k++){
2501  update[k] += source[k];
2502  }
2503  }
2504 };
2505 
2506 template<class rVector, class WeightVector, class XVector>
2507 rVector MV_DotWeighted(const rVector &r, const WeightVector & w, const XVector & x, int n = -1)
2508 {
2509  if (n < 0) {
2510  n = x.dimension_0 ();
2511  }
2512 
2513  typedef MV_DotWeighted_Functor<WeightVector, XVector, WeightVector::Rank> functor_type;
2514  Kokkos::parallel_reduce (n , functor_type (w, x), r);
2515  return r;
2516 }
2517 
2518 /*------------------------------------------------------------------------------------------
2519  *-------------------------- Multiply with scalar: y = a * x -------------------------------
2520  *------------------------------------------------------------------------------------------*/
2521 template<class RVector, class aVector, class XVector>
2522 struct V_MulScalarFunctor
2523 {
2524  typedef typename XVector::device_type device_type;
2525  typedef typename XVector::size_type size_type;
2526 
2527  RVector m_r;
2528  typename XVector::const_type m_x ;
2529  typename aVector::const_type m_a ;
2530  //--------------------------------------------------------------------------
2531 
2532  KOKKOS_INLINE_FUNCTION
2533  void operator()( const size_type i) const
2534  {
2535  m_r(i) = m_a[0]*m_x(i);
2536  }
2537 };
2538 
2539 template<class aVector, class XVector>
2540 struct V_MulScalarFunctorSelf
2541 {
2542  typedef typename XVector::device_type device_type;
2543  typedef typename XVector::size_type size_type;
2544 
2545  XVector m_x;
2546  typename aVector::const_type m_a ;
2547  //--------------------------------------------------------------------------
2548 
2549  KOKKOS_INLINE_FUNCTION
2550  void operator()( const size_type i) const
2551  {
2552  m_x(i) *= m_a(0);
2553  }
2554 };
2555 
2556 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector>
2557 RVector V_MulScalar( const RVector & r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation> & a, const XVector & x)
2558 {
2560  if(r==x) {
2561  V_MulScalarFunctorSelf<aVector,XVector> op ;
2562  op.m_x = x ;
2563  op.m_a = a ;
2564  Kokkos::parallel_for( x.dimension(0) , op );
2565  return r;
2566  }
2567 
2568  V_MulScalarFunctor<RVector,aVector,XVector> op ;
2569  op.m_r = r ;
2570  op.m_x = x ;
2571  op.m_a = a ;
2572  Kokkos::parallel_for( x.dimension(0) , op );
2573  return r;
2574 }
2575 
2576 template<class RVector, class XVector>
2577 struct V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector>
2578 {
2579  typedef typename XVector::device_type device_type;
2580  typedef typename XVector::size_type size_type;
2581 
2582  RVector m_r;
2583  typename XVector::const_type m_x ;
2584  typename XVector::value_type m_a ;
2585  //--------------------------------------------------------------------------
2586 
2587  KOKKOS_INLINE_FUNCTION
2588  void operator()( const size_type i) const
2589  {
2590  m_r(i) = m_a*m_x(i);
2591  }
2592 };
2593 
2594 template<class XVector>
2595 struct V_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
2596 {
2597  typedef typename XVector::device_type device_type;
2598  typedef typename XVector::size_type size_type;
2599 
2600  XVector m_x;
2601  typename XVector::non_const_value_type m_a ;
2602  //--------------------------------------------------------------------------
2603 
2604  KOKKOS_INLINE_FUNCTION
2605  void operator()( const size_type i) const
2606  {
2607  m_x(i) *= m_a;
2608  }
2609 };
2610 
2611 
2612 template<class RVector, class XVector>
2613 RVector V_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x)
2614 {
2615  if(r==x) {
2616  V_MulScalarFunctorSelf<typename RVector::value_type,RVector> op ;
2617  op.m_x = r ;
2618  op.m_a = a ;
2619  Kokkos::parallel_for( x.dimension(0) , op );
2620  return r;
2621  }
2622 
2623  V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
2624  op.m_r = r ;
2625  op.m_x = x ;
2626  op.m_a = a ;
2627  Kokkos::parallel_for( x.dimension(0) , op );
2628  return r;
2629 }
2630 
2631 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y>
2632 struct V_AddVectorFunctor
2633 {
2634  typedef typename RVector::device_type device_type;
2635  typedef typename RVector::size_type size_type;
2636  typedef typename XVector::non_const_value_type value_type;
2637  RVector m_r ;
2638  typename XVector::const_type m_x ;
2639  typename YVector::const_type m_y ;
2640  const value_type m_a;
2641  const value_type m_b;
2642 
2643  //--------------------------------------------------------------------------
2644  V_AddVectorFunctor(const RVector& r, const value_type& a,const XVector& x,const value_type& b,const YVector& y):
2645  m_r(r),m_x(x),m_y(y),m_a(a),m_b(b)
2646  { }
2647 
2648  KOKKOS_INLINE_FUNCTION
2649  void operator()( const size_type i ) const
2650  {
2651  if((scalar_x==1)&&(scalar_y==1))
2652  m_r(i) = m_x(i) + m_y(i);
2653  if((scalar_x==1)&&(scalar_y==-1))
2654  m_r(i) = m_x(i) - m_y(i);
2655  if((scalar_x==-1)&&(scalar_y==-1))
2656  m_r(i) = -m_x(i) - m_y(i);
2657  if((scalar_x==-1)&&(scalar_y==1))
2658  m_r(i) = -m_x(i) + m_y(i);
2659  if((scalar_x==2)&&(scalar_y==1))
2660  m_r(i) = m_a*m_x(i) + m_y(i);
2661  if((scalar_x==2)&&(scalar_y==-1))
2662  m_r(i) = m_a*m_x(i) - m_y(i);
2663  if((scalar_x==1)&&(scalar_y==2))
2664  m_r(i) = m_x(i) + m_b*m_y(i);
2665  if((scalar_x==-1)&&(scalar_y==2))
2666  m_r(i) = -m_x(i) + m_b*m_y(i);
2667  if((scalar_x==2)&&(scalar_y==2))
2668  m_r(i) = m_a*m_x(i) + m_b*m_y(i);
2669  }
2670 };
2671 
2672 template<class RVector, class XVector, int scalar_x>
2673 struct V_AddVectorSelfFunctor
2674 {
2675  typedef typename RVector::device_type device_type;
2676  typedef typename RVector::size_type size_type;
2677  typedef typename XVector::non_const_value_type value_type;
2678  RVector m_r ;
2679  typename XVector::const_type m_x ;
2680  const value_type m_a;
2681 
2682  V_AddVectorSelfFunctor(const RVector& r, const value_type& a,const XVector& x):
2683  m_r(r),m_x(x),m_a(a)
2684  { }
2685 
2686  KOKKOS_INLINE_FUNCTION
2687  void operator()( const size_type i ) const
2688  {
2689  if((scalar_x==1))
2690  m_r(i) += m_x(i);
2691  if((scalar_x==-1))
2692  m_r(i) -= m_x(i);
2693  if((scalar_x==2))
2694  m_r(i) += m_a*m_x(i);
2695  }
2696 };
2697 template<class RVector, class XVector, class YVector, int doalpha, int dobeta>
2698 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x,
2699  const typename XVector::non_const_value_type &bv, const YVector & y,int n=-1)
2700 {
2701  if(n == -1) n = x.dimension_0();
2702  if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) {
2703  V_AddVectorSelfFunctor<RVector,YVector,dobeta> f(r,bv,y);
2704  parallel_for(n,f);
2705  } else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) {
2706  V_AddVectorSelfFunctor<RVector,XVector,doalpha> f(r,av,x);
2707  parallel_for(n,f);
2708  } else {
2709  V_AddVectorFunctor<RVector,XVector,YVector,doalpha,dobeta> f(r,av,x,bv,y);
2710  parallel_for(n,f);
2711  }
2712  return r;
2713 }
2714 
2715 template<class RVector, class XVector, class YVector>
2716 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x,
2717  const typename YVector::non_const_value_type &bv, const YVector & y, int n = -1,
2718  int a=2,int b=2)
2719 {
2720  if(a==-1) {
2721  if(b==-1)
2722  V_AddVector<RVector,XVector,YVector,-1,-1>(r,av,x,bv,y,n);
2723  else if(b==0)
2724  V_AddVector<RVector,XVector,YVector,-1,0>(r,av,x,bv,y,n);
2725  else if(b==1)
2726  V_AddVector<RVector,XVector,YVector,-1,1>(r,av,x,bv,y,n);
2727  else
2728  V_AddVector<RVector,XVector,YVector,-1,2>(r,av,x,bv,y,n);
2729  } else if (a==0) {
2730  if(b==-1)
2731  V_AddVector<RVector,XVector,YVector,0,-1>(r,av,x,bv,y,n);
2732  else if(b==0)
2733  V_AddVector<RVector,XVector,YVector,0,0>(r,av,x,bv,y,n);
2734  else if(b==1)
2735  V_AddVector<RVector,XVector,YVector,0,1>(r,av,x,bv,y,n);
2736  else
2737  V_AddVector<RVector,XVector,YVector,0,2>(r,av,x,bv,y,n);
2738  } else if (a==1) {
2739  if(b==-1)
2740  V_AddVector<RVector,XVector,YVector,1,-1>(r,av,x,bv,y,n);
2741  else if(b==0)
2742  V_AddVector<RVector,XVector,YVector,1,0>(r,av,x,bv,y,n);
2743  else if(b==1)
2744  V_AddVector<RVector,XVector,YVector,1,1>(r,av,x,bv,y,n);
2745  else
2746  V_AddVector<RVector,XVector,YVector,1,2>(r,av,x,bv,y,n);
2747  } else if (a==2) {
2748  if(b==-1)
2749  V_AddVector<RVector,XVector,YVector,2,-1>(r,av,x,bv,y,n);
2750  else if(b==0)
2751  V_AddVector<RVector,XVector,YVector,2,0>(r,av,x,bv,y,n);
2752  else if(b==1)
2753  V_AddVector<RVector,XVector,YVector,2,1>(r,av,x,bv,y,n);
2754  else
2755  V_AddVector<RVector,XVector,YVector,2,2>(r,av,x,bv,y,n);
2756  }
2757  return r;
2758 }
2759 
2760 template<class RVector,class XVector,class YVector>
2761 RVector V_Add( const RVector & r, const XVector & x, const YVector & y, int n=-1)
2762 {
2763  return V_AddVector( r,1,x,1,y,n,1,1);
2764 }
2765 
2766 template<class RVector,class XVector,class YVector>
2767 RVector V_Add( const RVector & r, const XVector & x, const typename XVector::non_const_value_type & bv, const YVector & y,int n=-1 )
2768 {
2769  int b = 2;
2770  //if(bv == 0) b = 0;
2771  //if(bv == 1) b = 1;
2772  //if(bv == -1) b = -1;
2773  return V_AddVector(r,bv,x,bv,y,n,1,b);
2774 }
2775 
2776 template<class RVector,class XVector,class YVector>
2777 RVector V_Add( const RVector & r, const typename XVector::non_const_value_type & av, const XVector & x, const typename XVector::non_const_value_type & bv, const YVector & y,int n=-1 )
2778 {
2779  int a = 2;
2780  int b = 2;
2781  //if(av == 0) a = 0;
2782  //if(av == 1) a = 1;
2783  //if(av == -1) a = -1;
2784  //if(bv == 0) b = 0;
2785  //if(bv == 1) b = 1;
2786  //if(bv == -1) b = -1;
2787 
2788  return V_AddVector(r,av,x,bv,y,n,a,b);
2789 }
2790 
2791 template<class XVector, class YVector>
2792 struct V_DotFunctor
2793 {
2794  typedef typename XVector::device_type device_type;
2795  typedef typename XVector::size_type size_type;
2796  typedef typename XVector::non_const_value_type xvalue_type;
2797  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2798  typedef typename IPT::dot_type value_type;
2799  XVector m_x ;
2800  YVector m_y ;
2801 
2802  //--------------------------------------------------------------------------
2803  V_DotFunctor(const XVector& x,const YVector& y):
2804  m_x(x),m_y(y)
2805  { }
2806 
2807  KOKKOS_INLINE_FUNCTION
2808  void operator()( const size_type &i, value_type &sum ) const
2809  {
2810  sum += IPT::dot( m_x(i), m_y(i) ); // m_x(i) * m_y(i)
2811  }
2812 
2813  KOKKOS_INLINE_FUNCTION
2814  void init (volatile value_type& update) const
2815  {
2817  }
2818 
2819  KOKKOS_INLINE_FUNCTION
2820  void join( volatile value_type &update ,
2821  const volatile value_type &source ) const
2822  {
2823  update += source ;
2824  }
2825 };
2826 
2827 template<class XVector, class YVector>
2828 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
2829 V_Dot( const XVector & x, const YVector & y, int n = -1)
2830 {
2831  typedef V_DotFunctor<XVector,YVector> Functor;
2832  Functor f(x,y);
2833  if (n<0) n = x.dimension_0();
2834  typename Functor::value_type ret_val;
2835  parallel_reduce(n,f,ret_val);
2836  return ret_val;
2837 }
2838 
2839 template<class WeightVector, class XVector>
2840 struct V_DotWeighted_Functor
2841 {
2842  typedef typename XVector::device_type device_type;
2843  typedef typename XVector::size_type size_type;
2844  typedef typename XVector::non_const_value_type xvalue_type;
2845  typedef typename WeightVector::non_const_value_type wvalue_type;
2846  typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
2847  typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
2848  typedef typename XIPT::dot_type value_type;
2849 
2850  typename WeightVector::const_type m_w;
2851  typename XVector::const_type m_x;
2852 
2853  V_DotWeighted_Functor (WeightVector w, XVector x) :
2854  m_w (w), m_x (x)
2855  {}
2856 
2857  KOKKOS_INLINE_FUNCTION
2858  void operator() (const size_type i, value_type& sum) const {
2859  sum += XIPT::dot (m_x(i), m_x(i)) / WIPT::dot (m_w(i), m_w(i));
2860  }
2861 
2862  KOKKOS_INLINE_FUNCTION void init (value_type& update) const {
2864  }
2865 
2866  KOKKOS_INLINE_FUNCTION void
2867  join (volatile value_type& update,
2868  const volatile value_type& source) const
2869  {
2870  update += source;
2871  }
2872 };
2873 
2874 template<class WeightVector, class XVector>
2875 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
2876 V_DotWeighted(const WeightVector & w, const XVector & x, int n = -1)
2877 {
2878  if (n < 0) {
2879  n = x.dimension_0 ();
2880  }
2881 
2882  typedef Details::InnerProductSpaceTraits<typename XVector::non_const_value_type> IPT;
2883  typedef typename IPT::dot_type value_type;
2884  value_type ret_val;
2885 
2886  typedef V_DotWeighted_Functor<WeightVector,XVector> functor_type;
2887  Kokkos::parallel_reduce (n , functor_type (w, x), ret_val);
2888  return ret_val;
2889 }
2890 
2891 /*------------------------------------------------------------------------------------------
2892  *-------------------------- Compute Sum -------------------------------------------------
2893  *------------------------------------------------------------------------------------------*/
2894 template<class XVector>
2895 struct V_Sum_Functor
2896 {
2897  typedef typename XVector::device_type device_type;
2898  typedef typename XVector::size_type size_type;
2899  typedef typename XVector::non_const_value_type xvalue_type;
2900  typedef xvalue_type value_type;
2901 
2902  typename XVector::const_type m_x ;
2903 
2904  V_Sum_Functor(XVector x):m_x(x) {}
2905  //--------------------------------------------------------------------------
2906 
2907  KOKKOS_INLINE_FUNCTION
2908  void operator()( const size_type i, value_type& sum ) const
2909  {
2910  sum += m_x(i);
2911  }
2912 
2913  KOKKOS_INLINE_FUNCTION
2914  void init( value_type& update) const
2915  {
2917  }
2918 
2919  KOKKOS_INLINE_FUNCTION
2920  void join( volatile value_type& update ,
2921  const volatile value_type& source ) const
2922  {
2923  update += source;
2924  }
2925 };
2926 
2927 
2928 template<class VectorType>
2929 typename VectorType::non_const_value_type
2930 V_Sum (const VectorType& x, int n = -1)
2931 {
2932  if (n < 0) {
2933  n = x.dimension_0 ();
2934  }
2935 
2936  typedef typename VectorType::non_const_value_type value_type;
2937  value_type ret_val;
2938  Kokkos::parallel_reduce (n, V_Sum_Functor<VectorType> (x), ret_val);
2939  return ret_val;
2940 }
2941 
2942 /*------------------------------------------------------------------------------------------
2943  *-------------------------- Compute Norm1--------------------------------------------------
2944  *------------------------------------------------------------------------------------------*/
2945 template<class XVector>
2946 struct V_Norm1_Functor
2947 {
2948  typedef typename XVector::device_type device_type;
2949  typedef typename XVector::size_type size_type;
2950  typedef typename XVector::non_const_value_type xvalue_type;
2951  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2952  typedef typename IPT::dot_type value_type;
2953 
2954  typename XVector::const_type m_x ;
2955 
2956  V_Norm1_Functor(XVector x):m_x(x) {}
2957  //--------------------------------------------------------------------------
2958  KOKKOS_INLINE_FUNCTION
2959  void operator()( const size_type i, value_type& sum ) const
2960  {
2962  }
2963  KOKKOS_INLINE_FUNCTION void init (value_type& update) const
2964  {
2966  }
2967  KOKKOS_INLINE_FUNCTION void join( volatile value_type& update ,
2968  const volatile value_type& source ) const
2969  {
2970  update += source;
2971  }
2972 };
2973 
2974 template<class VectorType>
2975 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
2976 V_Norm1( const VectorType & x, int n = -1)
2977 {
2978  if (n < 0) {
2979  n = x.dimension_0 ();
2980  }
2981 
2982  typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
2983  typedef typename IPT::dot_type value_type;
2984  value_type ret_val;
2985  Kokkos::parallel_reduce (n, V_Norm1_Functor<VectorType> (x), ret_val);
2986  return ret_val;
2987 }
2988 /*------------------------------------------------------------------------------------------
2989  *-------------------------- Compute NormInf--------------------------------------------------
2990  *------------------------------------------------------------------------------------------*/
2991 template<class XVector>
2992 struct V_NormInf_Functor
2993 {
2994  typedef typename XVector::device_type device_type;
2995  typedef typename XVector::size_type size_type;
2996  typedef typename XVector::non_const_value_type xvalue_type;
2997  typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
2998  typedef typename IPT::dot_type value_type;
2999 
3000  typename XVector::const_type m_x ;
3001 
3002  V_NormInf_Functor(XVector x):m_x(x) {}
3003  //--------------------------------------------------------------------------
3004  KOKKOS_INLINE_FUNCTION
3005  void operator()( const size_type i, value_type& sum ) const
3006  {
3008  }
3009 
3010  KOKKOS_INLINE_FUNCTION void init (value_type& update) const
3011  {
3013  }
3014  KOKKOS_INLINE_FUNCTION void join( volatile value_type& update ,
3015  const volatile value_type& source ) const
3016  {
3017  update = MAX(update,source);
3018  }
3019 };
3020 
3021 template<class VectorType>
3022 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
3023 V_NormInf (const VectorType& x, int n = -1)
3024 {
3025  if (n < 0) {
3026  n = x.dimension_0 ();
3027  }
3028 
3029  typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
3030  typedef typename IPT::dot_type value_type;
3031  value_type ret_val;
3032  Kokkos::parallel_reduce (n, V_NormInf_Functor<VectorType> (x), ret_val);
3033  return ret_val;
3034 }
3035 
3036 /*------------------------------------------------------------------------------------------
3037  *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------
3038  *------------------------------------------------------------------------------------------*/
3039 template<class RVector, class XVector>
3040 struct V_ReciprocalFunctor
3041 {
3042  typedef typename XVector::device_type device_type;
3043  typedef typename XVector::size_type size_type;
3044 
3045  RVector m_r;
3046  typename XVector::const_type m_x ;
3047 
3048  V_ReciprocalFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
3049  //--------------------------------------------------------------------------
3050 
3051  KOKKOS_INLINE_FUNCTION
3052  void operator()( const size_type i) const
3053  {
3055  }
3056 };
3057 
3058 template<class XVector>
3059 struct V_ReciprocalSelfFunctor
3060 {
3061  typedef typename XVector::device_type device_type;
3062  typedef typename XVector::size_type size_type;
3063 
3064  XVector m_x ;
3065 
3066  V_ReciprocalSelfFunctor(XVector x):m_x(x) {}
3067  //--------------------------------------------------------------------------
3068 
3069  KOKKOS_INLINE_FUNCTION
3070  void operator()( const size_type i) const
3071  {
3073  }
3074 };
3075 
3076 template<class RVector, class XVector>
3077 RVector V_Reciprocal( const RVector & r, const XVector & x)
3078 {
3079  // TODO: Add error check (didn't link for some reason?)
3080  /*if(r.dimension_0() != x.dimension_0())
3081  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match");
3082  */
3083 
3084 
3085  if(r==x) {
3086  V_ReciprocalSelfFunctor<XVector> op(x) ;
3087  Kokkos::parallel_for( x.dimension_0() , op );
3088  return r;
3089  }
3090 
3091  V_ReciprocalFunctor<RVector,XVector> op(r,x) ;
3092  Kokkos::parallel_for( x.dimension_0() , op );
3093  return r;
3094 }
3095 
3096 /*------------------------------------------------------------------------------------------
3097  *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ----------------
3098  *------------------------------------------------------------------------------------------*/
3099 template<class XVector>
3100 struct V_ReciprocalThresholdSelfFunctor
3101 {
3102  typedef typename XVector::device_type device_type;
3103  typedef typename XVector::size_type size_type;
3104  typedef typename XVector::non_const_value_type value_type;
3106  typedef typename KAT::mag_type mag_type;
3107 
3108  const XVector m_x;
3109  const value_type m_min_val;
3110  const value_type m_min_val_mag;
3111 
3112  V_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val) :
3113  m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)) {}
3114  //--------------------------------------------------------------------------
3115 
3116  KOKKOS_INLINE_FUNCTION
3117  void operator()( const size_type i) const
3118  {
3119  if (KAT::abs(m_x(i)) < m_min_val_mag)
3120  m_x(i) = m_min_val;
3121  else
3122  m_x(i) = KAT::one() / m_x(i);
3123  }
3124 };
3125 
3126 template<class XVector>
3127 XVector V_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val )
3128 {
3129  V_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val) ;
3130  Kokkos::parallel_for( x.dimension_0() , op );
3131  return x;
3132 }
3133 
3134 /*------------------------------------------------------------------------------------------
3135  *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------
3136  *------------------------------------------------------------------------------------------*/
3137 template<class RVector, class XVector>
3138 struct V_AbsFunctor
3139 {
3140  typedef typename XVector::device_type device_type;
3141  typedef typename XVector::size_type size_type;
3142 
3143  RVector m_r;
3144  typename XVector::const_type m_x ;
3145 
3146  V_AbsFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
3147  //--------------------------------------------------------------------------
3148 
3149  KOKKOS_INLINE_FUNCTION
3150  void operator()( const size_type i) const
3151  {
3153  }
3154 };
3155 
3156 template<class XVector>
3157 struct V_AbsSelfFunctor
3158 {
3159  typedef typename XVector::device_type device_type;
3160  typedef typename XVector::size_type size_type;
3161 
3162  XVector m_x ;
3163 
3164  V_AbsSelfFunctor(XVector x):m_x(x) {}
3165  //--------------------------------------------------------------------------
3166 
3167  KOKKOS_INLINE_FUNCTION
3168  void operator()( const size_type i) const
3169  {
3171  }
3172 };
3173 
3174 template<class RVector, class XVector>
3175 RVector V_Abs( const RVector & r, const XVector & x)
3176 {
3177  // TODO: Add error check (didn't link for some reason?)
3178  /*if(r.dimension_0() != x.dimension_0())
3179  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match");
3180  */
3181 
3182 
3183  if(r==x) {
3184  V_AbsSelfFunctor<XVector> op(x) ;
3185  Kokkos::parallel_for( x.dimension_0() , op );
3186  return r;
3187  }
3188 
3189  V_AbsFunctor<RVector,XVector> op(r,x) ;
3190  Kokkos::parallel_for( x.dimension_0() , op );
3191  return r;
3192 }
3193 
3194 /*------------------------------------------------------------------------------------------
3195  *------ ElementWiseMultiply element wise: C(i) = c*C(i) + ab*A(i)*B(i) --------------
3196  *------------------------------------------------------------------------------------------*/
3197 template<class CVector, class AVector, class BVector>
3198 struct V_ElementWiseMultiplyFunctor
3199 {
3200  typedef typename CVector::device_type device_type;
3201  typedef typename CVector::size_type size_type;
3202 
3203  typename CVector::const_value_type m_c;
3204  CVector m_C;
3205  typename AVector::const_value_type m_ab;
3206  typename AVector::const_type m_A ;
3207  typename BVector::const_type m_B ;
3208 
3209  V_ElementWiseMultiplyFunctor(
3210  typename CVector::const_value_type c,
3211  CVector C,
3212  typename AVector::const_value_type ab,
3213  typename AVector::const_type A,
3214  typename BVector::const_type B):
3215  m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B)
3216  {}
3217  //--------------------------------------------------------------------------
3218 
3219  KOKKOS_INLINE_FUNCTION
3220  void operator()( const size_type i) const
3221  {
3222  m_C(i) = m_c*m_C(i) + m_ab*m_A(i)*m_B(i);
3223  }
3224 };
3225 
3226 
3227 template<class CVector, class AVector, class BVector>
3228 CVector V_ElementWiseMultiply(
3229  typename CVector::const_value_type c,
3230  CVector C,
3231  typename AVector::const_value_type ab,
3232  AVector A,
3233  BVector B
3234  )
3235 {
3236  // TODO: Add error check (didn't link for some reason?)
3237  /*if(r.dimension_0() != x.dimension_0())
3238  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match");
3239  if(r.dimension_1() != x.dimension_1())
3240  Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/
3241 
3242  //TODO: Get 1D version done
3243  /*if(r.dimension_1()==1) {
3244  typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
3245  typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
3246 
3247  RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
3248  XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
3249  return V_ElementWiseMultiply(r_1d,x_1d);
3250  }*/
3251 
3252  V_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B) ;
3253  Kokkos::parallel_for( C.dimension_0() , op );
3254  return C;
3255 }
3256 }//end namespace Kokkos
3257 #endif /* KOKKOS_MULTIVECTOR_H_ */
static KOKKOS_FORCEINLINE_FUNCTION T sqrt(const T &x)
The square root of x.
static KOKKOS_FORCEINLINE_FUNCTION mag_type abs(const T &x)
The absolute value (magnitude) of x.
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
ArithTraits< T >::mag_type mag_type
The type returned by norm(x) for a value x of type T.
static KOKKOS_FORCEINLINE_FUNCTION T one()
The one value of T; the multiplicative identity.
T dot_type
The type returned by dot(x,y) for values x and y of type T.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
static KOKKOS_FORCEINLINE_FUNCTION dot_type dot(const T &x, const T &y)
The "dot product" of two values x and y of type T.
KOKKOS_INLINE_FUNCTION RealType abs(const complex< RealType > &x)
Absolute value (magnitude) of a complex number.
View to an array of data.
static KOKKOS_FORCEINLINE_FUNCTION T zero()
The zero value of T; the arithmetic identity.
Traits class for inner product space operations on type T.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
static KOKKOS_FORCEINLINE_FUNCTION mag_type norm(const T &x)
The "norm" (absolute value or magnitude) of a value x of type T.
Declaration and definition of Kokkos::Details::InnerProductSpaceTraits.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
T mag_type
The type of the magnitude (absolute value) of T.
Traits class for arithmetic on type T.