42 #ifndef TPETRA_RTI_detail_HPP
43 #define TPETRA_RTI_detail_HPP
45 #include <Teuchos_Assert.hpp>
46 #include <Teuchos_CommHelpers.hpp>
48 #include "Tpetra_Vector.hpp"
66 inline StdOpKernel() : _alpha(ScalarTraits<S>::one()), _beta(ScalarTraits<S>::zero()) {}
67 inline void setData(S * vec_inout,
const S * vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2; }
68 inline void setAlphaBeta(
const S &alpha,
const S &beta) { _alpha = alpha; _beta = beta; }
72 template <
class OP,
class S>
79 inline void setData (S *vec) {
82 inline void execute (
const int i)
const {
83 _vec[i] = _op (_vec[i]);
88 template <
class OP,
class S1,
class S2>
96 inline void setData (S1 *vec_inout,
const S2 *vec_in2) {
97 _vec_inout = vec_inout;
100 inline void execute (
const int i)
const {
101 _vec_inout[i] = _op (_vec_inout[i], _vec_in2[i]);
106 template <
class OP,
class S>
114 inline void setData (S *vec_inout,
const S *vec_in2) {
115 _vec_inout = vec_inout;
118 inline void execute (
const int i)
const {
119 S res = _op (_vec_inout[i], _vec_in2[i]);
120 _vec_inout[i] = this->_alpha * res + this->_beta * _vec_inout[i];
125 template <
class OP,
class S1,
class S2,
class S3>
134 inline void setData (S1 *vec_inout,
const S2 *vec_in2,
const S3 *vec_in3) {
135 _vec_inout = vec_inout;
139 inline void execute (
const int i)
const {
140 _vec_inout[i] = _op (_vec_inout[i], _vec_in2[i], _vec_in3[i]);
145 template <
class Glob,
class S>
148 typedef typename Glob::GenOP GenOP;
149 typedef typename Glob::RedOP RedOP;
150 typedef typename Glob::IdOP IdOP;
151 typedef typename RedOP::result_type ReductionType;
158 _genop (glob.genop), _redop (glob.redop)
160 inline void setData (
const S *vec_in) {
163 inline ReductionType identity ()
const {
164 return IdOP::identity ();
166 inline ReductionType generate (
const int i)
const {
167 return _genop (_vec_in[i]);
169 inline ReductionType reduce (ReductionType a, ReductionType b)
const {
175 template <
class Glob,
class S1,
class S2>
178 typedef typename Glob::GenOP GenOP;
179 typedef typename Glob::RedOP RedOP;
180 typedef typename Glob::IdOP IdOP;
181 typedef typename RedOP::result_type ReductionType;
189 _genop (glob.genop), _redop (glob.redop)
191 inline void setData (
const S1 *vec_in1,
const S2 *vec_in2) {
192 _vec_in1 = vec_in1; _vec_in2 = vec_in2;
194 inline ReductionType identity ()
const {
195 return IdOP::identity ();
197 inline ReductionType generate (
const int i)
const {
198 return _genop (_vec_in1[i], _vec_in2[i]);
200 inline ReductionType reduce (ReductionType a, ReductionType b)
const {
201 return _redop (a, b);
206 template <
class Glob,
class S1,
class S2,
class S3>
209 typedef typename Glob::GenOP GenOP;
210 typedef typename Glob::RedOP RedOP;
211 typedef typename Glob::IdOP IdOP;
212 typedef typename RedOP::result_type ReductionType;
221 _genop (glob.genop), _redop (glob.redop)
223 inline void setData (
const S1 *vec_in1,
const S2 *vec_in2,
const S3 *vec_in3) {
224 _vec_in1 = vec_in1; _vec_in2 = vec_in2; _vec_in3 = vec_in3;
226 inline ReductionType identity ()
const {
227 return IdOP::identity ();
229 inline ReductionType generate (
const int i)
const {
230 return _genop (_vec_in1[i], _vec_in2[i], _vec_in3[i]);
232 inline ReductionType reduce (ReductionType a, ReductionType b)
const {
233 return _redop (a, b);
238 template <
class Glob,
class S1,
class S2>
241 typedef typename Glob::TOP TOP;
242 typedef typename Glob::GenOP GenOP;
243 typedef typename Glob::RedOP RedOP;
244 typedef typename Glob::IdOP IdOP;
245 typedef typename RedOP::result_type ReductionType;
254 _top (glob.top), _genop (glob.genop), _redop (glob.redop)
256 inline void setData (S1 *vec_inout,
const S2 *vec_in2) {
257 _vec_inout = vec_inout;
260 inline ReductionType identity ()
const {
261 return IdOP::identity ();
263 inline ReductionType reduce (ReductionType a, ReductionType b)
const {
264 return _redop (a, b);
266 inline ReductionType generate (
const int i)
const {
267 _vec_inout[i] = _top (_vec_inout[i], _vec_in2[i]);
268 return _genop (_vec_inout[i], _vec_in2[i]);
273 template <
class Glob,
class S1,
class S2,
class S3>
276 typedef typename Glob::TOP TOP;
277 typedef typename Glob::GenOP GenOP;
278 typedef typename Glob::RedOP RedOP;
279 typedef typename Glob::IdOP IdOP;
280 typedef typename RedOP::result_type ReductionType;
290 _top (glob.top), _genop (glob.genop), _redop (glob.redop)
292 inline void setData (S1 *vec_inout,
const S2 *vec_in2,
const S3 *vec_in3) {
293 _vec_inout = vec_inout;
297 inline ReductionType identity ()
const {
298 return IdOP::identity ();
300 inline ReductionType reduce (ReductionType a, ReductionType b)
const {
301 return _redop (a, b);
303 inline ReductionType generate (
const int i)
const {
304 _vec_inout[i] = _top (_vec_inout[i], _vec_in2[i], _vec_in3[i]);
305 return _genop (_vec_inout[i], _vec_in2[i], _vec_in3[i]);
312 public Teuchos::ValueTypeReductionOp<int,typename OP::ReductionType> {
316 typedef typename OP::ReductionType Packet;
318 void reduce (
const int count,
const Packet inBuffer[], Packet inoutBuffer [])
const
320 for (
int i = 0; i != count; ++i) {
321 inoutBuffer[i] = _op.reduce (inoutBuffer[i], inBuffer[i]);
327 template <
class S,
class LO,
class GO,
class Node,
class OP>
330 KokkosClassic::MultiVector<S,Node> mv = vec.getLocalMV ();
331 const RCP<Node> node = mv.getNode();
333 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
335 S * out_ptr = rbh.addNonConstBuffer(mv.getValuesNonConst());
338 const size_t N = mv.getNumRows();
339 node->template parallel_for (0, N, op);
343 template <
class S1,
class S2,
class LO,
class GO,
class Node,
class OP>
346 KokkosClassic::MultiVector<S1,Node> mv_inout = vec_inout.getLocalMV ();
347 KokkosClassic::MultiVector<S2,Node> mv_in2 = vec_in2.getLocalMV ();
348 const RCP<Node> node = mv_inout.getNode();
350 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
352 S1 * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
353 const S2 * in_ptr = rbh.addConstBuffer(mv_in2.getValues());
355 op.setData(out_ptr, in_ptr);
356 const size_t N = mv_inout.getNumRows();
357 #ifdef HAVE_TPETRA_DEBUG
358 TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode(), std::runtime_error,
359 "Tpetra::RTI::detail::binary_transform(): multivectors must share the same node.");
361 node->template parallel_for(0, N, op);
365 template <
class S1,
class S2,
class S3,
class LO,
class GO,
class Node,
class OP>
366 void tertiary_transform(
Vector<S1,LO,GO,Node> &vec_inout,
const Vector<S2,LO,GO,Node> &vec_in2,
const Vector<S3,LO,GO,Node> &vec_in3, OP op)
368 KokkosClassic::MultiVector<S1,Node> mv_inout = vec_inout.getLocalMV ();
369 KokkosClassic::MultiVector<S2,Node> mv_in2 = vec_in2.getLocalMV ();
370 KokkosClassic::MultiVector<S3,Node> mv_in3 = vec_in3.getLocalMV ();
371 const RCP<Node> node = mv_inout.getNode();
373 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
375 S1 * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
376 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
377 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
379 op.setData(out_ptr, in_ptr2, in_ptr3);
380 const size_t N = mv_inout.getNumRows();
381 #ifdef HAVE_TPETRA_DEBUG
382 TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode() || mv_in3.getNode() != mv_in2.getNode(), std::runtime_error,
383 "Tpetra::RTI::detail::tertiary_transform(): multivectors must share the same node.");
385 node->template parallel_for(0, N, op);
389 template <
class S,
class LO,
class GO,
class Node,
class OP>
390 typename OP::ReductionType
393 const KokkosClassic::MultiVector<S,Node> &mv_in = vec_in.getLocalMV();
394 const RCP<Node> node = mv_in.getNode();
395 const RCP<const Teuchos::Comm<int> > comm = vec_in.
getMap()->getComm();
397 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
399 const S * in_ptr = rbh.addConstBuffer(mv_in.getValues());
401 op.setData( in_ptr );
402 const size_t N = mv_in.getNumRows();
404 typename OP::ReductionType gbl_res, lcl_res;
405 lcl_res = node->template parallel_reduce(0, N, op);
408 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
413 template <
class S1,
class S2,
class LO,
class GO,
class Node,
class OP>
414 typename OP::ReductionType
417 const KokkosClassic::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV(),
418 &mv_in2 = vec_in2.getLocalMV();
419 const RCP<Node> node = mv_in1.getNode();
420 const RCP<const Teuchos::Comm<int> > comm = vec_in1.
getMap()->getComm();
422 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
424 const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
425 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
427 op.setData( in_ptr1, in_ptr2 );
428 const size_t N = mv_in1.getNumRows();
429 #ifdef HAVE_TPETRA_DEBUG
430 TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode(), std::runtime_error,
431 "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
434 typename OP::ReductionType gbl_res, lcl_res;
435 lcl_res = node->template parallel_reduce(0, N, op);
438 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
443 template <
class S1,
class S2,
class S3,
class LO,
class GO,
class Node,
class OP>
444 typename OP::ReductionType
445 reduce(
const Vector<S1,LO,GO,Node> &vec_in1,
const Vector<S2,LO,GO,Node> &vec_in2,
const Vector<S3,LO,GO,Node> &vec_in3, OP op)
447 const KokkosClassic::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV();
448 const KokkosClassic::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV();
449 const KokkosClassic::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV();
450 const RCP<Node> node = mv_in1.getNode();
451 const RCP<const Teuchos::Comm<int> > comm = vec_in1.
getMap()->getComm();
453 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
455 const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
456 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
457 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
459 op.setData( in_ptr1, in_ptr2, in_ptr3 );
460 const size_t N = mv_in1.getNumRows();
461 #ifdef HAVE_TPETRA_DEBUG
462 TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode() || mv_in2.getNode() != mv_in3.getNode(), std::runtime_error,
463 "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
466 typename OP::ReductionType gbl_res, lcl_res;
467 lcl_res = node->template parallel_reduce(0, N, op);
470 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
475 template <
class S1,
class S2,
class LO,
class GO,
class Node,
class OP>
476 typename OP::ReductionType
479 KokkosClassic::MultiVector<S1,Node> mv_inout = vec_inout.getLocalMV ();
480 KokkosClassic::MultiVector<S2,Node> mv_in2 = vec_in2.getLocalMV ();
481 const RCP<Node> node = mv_inout.getNode();
482 const RCP<const Teuchos::Comm<int> > comm = vec_inout.
getMap()->getComm();
484 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
486 S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
487 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
489 op.setData( in_ptr1, in_ptr2 );
490 const size_t N = mv_inout.getNumRows();
491 #ifdef HAVE_TPETRA_DEBUG
492 TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode(), std::runtime_error,
493 "Tpetra::RTI::detail::transform_reduce(): multivectors must share the same node.");
496 typename OP::ReductionType gbl_res, lcl_res;
497 lcl_res = node->template parallel_reduce(0, N, op);
500 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
505 template <
class S1,
class S2,
class S3,
class LO,
class GO,
class Node,
class OP>
506 typename OP::ReductionType
507 transform_reduce(
Vector<S1,LO,GO,Node> &vec_inout,
const Vector<S2,LO,GO,Node> &vec_in2,
const Vector<S3,LO,GO,Node> &vec_in3, OP op)
509 KokkosClassic::MultiVector<S1,Node> mv_inout = vec_inout.getLocalMV ();
510 KokkosClassic::MultiVector<S2,Node> mv_in2 = vec_in2.getLocalMV ();
511 KokkosClassic::MultiVector<S3,Node> mv_in3 = vec_in3.getLocalMV ();
512 const RCP<Node> node = mv_inout.getNode();
513 const RCP<const Teuchos::Comm<int> > comm = vec_inout.
getMap()->getComm();
515 KokkosClassic::ReadyBufferHelper<Node> rbh(node);
517 S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
518 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
519 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
521 op.setData( in_ptr1, in_ptr2, in_ptr3 );
522 const size_t N = mv_inout.getNumRows();
523 #ifdef HAVE_TPETRA_DEBUG
524 TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode() && mv_inout.getNode() != mv_in3.getNode(), std::runtime_error,
525 "Tpetra::RTI::detail::transform_transform(): multivectors must share the same node.");
528 typename OP::ReductionType gbl_res, lcl_res;
529 lcl_res = node->template parallel_reduce(0, N, op);
532 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
542 #endif // TPETRA_RTI_detail_HPP
Utility base class for kernels used to define Tpetra::Operator objects.
OP::ReductionType transform_reduce(Vector< S1, LO, GO, Node > &vec_inout, const Vector< S2, LO, GO, Node > &vec_in2, OP op)
pass vec_inout and vec_in2 data pointers to \ op, then execute via node parallel_reduce.
adapter class between kernels for Tpetra::RTI::binary_transform and Tpetra::RTI::detail::binary_trans...
void unary_transform(Vector< S, LO, GO, Node > &vec, OP op)
pass vec data pointer to op, then execute via node parallel_for
OP::ReductionType reduce(const Vector< S, LO, GO, Node > &vec_in, OP op)
pass vec_in data pointer to \ op, then execute via node parallel_reduce.
adapter class between kernels for Tpetra::RTI::unary_transform and Tpetra::RTI::detail::unary_transfo...
decorator for Kokkos reduction kernels to satisfy requirements for Teuchos::ValueTypeReductionOp ...
void binary_transform(Vector< S1, LO, GO, Node > &vec_inout, const Vector< S2, LO, GO, Node > &vec_in2, OP op)
pass vec_inout and vec_in2 data pointers to op, then execute via node parallel_for ...
adapter class between kernels for Tpetra::RTI::binary_transform and Tpetra::RTI::detail::binary_trans...
void tertiary_transform(Vector< S1, LO, GO, Node > &vec_inout, const Vector< S2, LO, GO, Node > &vec_in2, const Vector< S3, LO, GO, Node > &vec_in3, OP op)
pass vec_inout, vec_in2 and vec_in3 data pointers to op, then execute via node parallel_for ...
adapter class between binary functors and BinaryOp
adapter class between kernels for Tpetra::RTI::tertiary_transform and Tpetra::RTI::detail::tertiary_t...
adapter class between kernels for Tpetra::RTI::binary_transform and Tpetra::RTI::detail::binary_trans...
adapter class between kernels for Tpetra::RTI::binary_transform and Tpetra::RTI::detail::binary_trans...
A distributed dense vector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.