Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_transform_MultiVector.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ************************************************************************
39 // @HEADER
40 */
41 
42 #ifndef TPETRA_TRANSFORM_MULTIVECTOR_HPP
43 #define TPETRA_TRANSFORM_MULTIVECTOR_HPP
44 
45 #include "Tpetra_transform.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Teuchos_Comm.hpp"
51 #include "Teuchos_TestForException.hpp"
52 #include "Kokkos_Core.hpp"
53 #include <iostream>
54 #include <sstream>
55 
59 
60 namespace Tpetra {
61  namespace Details {
62 
63  // Kokkos::parallel_for functor that implements unary
64  // Tpetra::transform for MultiVector objects.
65  //
66  // The input and output Views may be the same object (locally).
67  template<class InputViewType,
68  class OutputViewType,
69  class UnaryFunctionType,
70  class LocalIndexType>
71  class MultiVectorUnaryTransformLoopBody {
72  private:
73  static_assert (static_cast<int> (InputViewType::Rank) == 2,
74  "InputViewType must be a rank-2 Kokkos::View.");
75  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
76  "OutputViewType must be a rank-2 Kokkos::View.");
77 
78  public:
79  MultiVectorUnaryTransformLoopBody (const InputViewType& in,
80  const OutputViewType& out,
81  UnaryFunctionType f) :
82  in_ (in), out_ (out), f_ (f)
83  {}
84 
85  KOKKOS_INLINE_FUNCTION void
86  operator () (const LocalIndexType i) const {
87  using LO = LocalIndexType;
88  const LO numCols = static_cast<LO> (in_.extent (1));
89  for (LO j = 0; j < numCols; ++j) {
90  out_(i,j) = f_ (in_(i,j));
91  }
92  }
93 
94  private:
95  InputViewType in_;
96  OutputViewType out_;
97  UnaryFunctionType f_;
98  };
99 
100  // Kokkos::parallel_for functor that implements binary
101  // Tpetra::transform for MultiVector objects.
102  //
103  // The input and output Views may be the same object (locally).
104  template<class InputViewType1,
105  class InputViewType2,
106  class OutputViewType,
107  class BinaryFunctionType,
108  class LocalIndexType>
109  class MultiVectorBinaryTransformLoopBody {
110  private:
111  static_assert (static_cast<int> (InputViewType1::Rank) == 2,
112  "InputViewType1 must be a rank-2 Kokkos::View.");
113  static_assert (static_cast<int> (InputViewType2::Rank) == 2,
114  "InputViewType2 must be a rank-2 Kokkos::View.");
115  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
116  "OutputViewType must be a rank-2 Kokkos::View.");
117 
118  public:
119  MultiVectorBinaryTransformLoopBody (const InputViewType1& in1,
120  const InputViewType2& in2,
121  const OutputViewType& out,
122  BinaryFunctionType f) :
123  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
124  {}
125 
126  KOKKOS_INLINE_FUNCTION void
127  operator () (const LocalIndexType i) const {
128  using LO = LocalIndexType;
129  const LO numCols = static_cast<LO> (in1_.extent (1));
130  for (LO j = 0; j < numCols; ++j) {
131  out_(i,j) = f_ (in1_(i,j), in2_(i,j));
132  }
133  }
134 
135  private:
136  InputViewType1 in1_;
137  InputViewType2 in2_;
138  OutputViewType out_;
139  BinaryFunctionType f_;
140  };
141 
142  // Kokkos::parallel_for functor that implements unary
143  // Tpetra::transform for Vector objects.
144  //
145  // The input and output Views may be the same object (locally).
146  template<class InputViewType,
147  class OutputViewType,
148  class UnaryFunctionType,
149  class LocalIndexType>
150  class VectorUnaryTransformLoopBody {
151  private:
152  static_assert (static_cast<int> (InputViewType::Rank) == 1,
153  "InputViewType must be a rank-1 Kokkos::View.");
154  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
155  "OutputViewType must be a rank-1 Kokkos::View.");
156 
157  public:
158  VectorUnaryTransformLoopBody (const InputViewType& in,
159  const OutputViewType& out,
160  UnaryFunctionType f) :
161  in_ (in), out_ (out), f_ (f)
162  {}
163 
164  KOKKOS_INLINE_FUNCTION void
165  operator () (const LocalIndexType i) const {
166  out_(i) = f_ (in_(i));
167  }
168 
169  private:
170  InputViewType in_;
171  OutputViewType out_;
172  UnaryFunctionType f_;
173  };
174 
175  // Kokkos::parallel_for functor that implements binary
176  // Tpetra::transform for Vector objects.
177  //
178  // The input and output Views may be the same object (locally).
179  template<class InputViewType1,
180  class InputViewType2,
181  class OutputViewType,
182  class BinaryFunctionType,
183  class LocalIndexType>
184  class VectorBinaryTransformLoopBody {
185  private:
186  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
187  "InputViewType1 must be a rank-1 Kokkos::View.");
188  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
189  "InputViewType1 must be a rank-1 Kokkos::View.");
190  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
191  "OutputViewType must be a rank-1 Kokkos::View.");
192 
193  public:
194  VectorBinaryTransformLoopBody (const InputViewType1& in1,
195  const InputViewType2& in2,
196  const OutputViewType& out,
197  BinaryFunctionType f) :
198  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
199  {}
200 
201  KOKKOS_INLINE_FUNCTION void
202  operator () (const LocalIndexType i) const {
203  out_(i) = f_ (in1_(i), in2_(i));
204  }
205 
206  private:
207  InputViewType1 in1_;
208  InputViewType2 in2_;
209  OutputViewType out_;
210  BinaryFunctionType f_;
211  };
212 
213  // CUDA 9.2 doesn't like it when you call lambdas in private or
214  // protected methods (of Transform, in this case). Thus, I've
215  // broken out Transform::transform_sameObject into a separate
216  // functor and nonmember function.
217  template<class ExecutionSpace,
218  class SC, class LO, class GO, class NT,
219  class UnaryFunctionType>
220  class UnaryTransformSameMultiVector {
221  private:
223  using IST = typename MV::impl_scalar_type;
224 
225  public:
226  UnaryTransformSameMultiVector (UnaryFunctionType f) : f_ (f) {}
227 
228  KOKKOS_INLINE_FUNCTION void operator() (IST& X_ij) const {
229  // User function has the form IST(const IST&) suitable for
230  // transform, so we have to convert it to a for_each function
231  // of the form void(IST&).
232  X_ij = f_(X_ij);
233  }
234 
235  private:
236  UnaryFunctionType f_;
237  };
238 
239  template<class ExecutionSpace,
240  class SC, class LO, class GO, class NT,
241  class UnaryFunctionType>
242  void
243  unaryTransformSameMultiVector (const char kernelLabel[],
244  ExecutionSpace execSpace,
246  UnaryFunctionType f)
247  {
248  using functor_type = UnaryTransformSameMultiVector<ExecutionSpace,
249  SC, LO, GO, NT, UnaryFunctionType>;
250  ::Tpetra::for_each (kernelLabel, execSpace, output, functor_type (f));
251  }
252 
255  template<class ExecutionSpace,
256  class SC, class LO, class GO, class NT>
257  struct Transform<ExecutionSpace,
258  ::Tpetra::MultiVector<SC, LO, GO, NT> >
259  {
260  private:
261  // Given a Kokkos execution space on which the user wants to run
262  // the transform, and a memory space in which the MultiVector's
263  // data live, determine the memory space that transform should
264  // use in its withLocalAccess call.
265  template<class MemorySpace>
266  using transform_memory_space =
267  typename std::conditional<
268  Kokkos::SpaceAccessibility<
269  ExecutionSpace,
270  typename MemorySpace::memory_space>::accessible,
271  typename MemorySpace::memory_space,
272  typename ExecutionSpace::memory_space>::type;
273 
275  using preferred_memory_space =
276  typename MV::device_type::memory_space;
277  using memory_space =
278  transform_memory_space<preferred_memory_space>;
279  using IST = typename MV::impl_scalar_type;
280 
281  // This is not the same as "aliases" -- we actually want to know
282  // if input and output are the same object (locally), so that we
283  // can sync correctly. The result of transform is undefined if
284  // input and output partially alias one another.
285  static bool
286  sameObject (const ::Tpetra::MultiVector<SC, LO, GO, NT>& input,
287  const ::Tpetra::MultiVector<SC, LO, GO, NT>& output)
288  {
289  return &input == &output ||
290  input.getLocalViewHost ().data () ==
291  output.getLocalViewHost ().data () ||
292  input.getLocalViewDevice ().data () ==
293  output.getLocalViewDevice ().data ();
294  }
295 
296  template<class UnaryFunctionType>
297  static void
298  transform_vec_notSameObject
299  (const char kernelLabel[],
300  ExecutionSpace execSpace,
303  UnaryFunctionType f)
304  {
305  memory_space memSpace;
306  // Generic lambdas need C++14, so we need a typedef here.
307  using input_view_type =
309  decltype (readOnly (input).on (memSpace). at(execSpace))>;
310  using output_view_type =
312  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
313 
315  ([=] (const input_view_type& input_lcl,
316  const output_view_type& output_lcl) {
317  using functor_type = VectorUnaryTransformLoopBody<
318  input_view_type, output_view_type, UnaryFunctionType, LO>;
319  functor_type g (input_lcl, output_lcl, f);
320 
321  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
322  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
323  range_type range (execSpace, 0, lclNumRows);
324 
325  Kokkos::parallel_for (kernelLabel, range, g);
326  },
327  readOnly (input).on (memSpace).at (execSpace),
328  writeOnly (output).on (memSpace).at (execSpace));
329  }
330 
331  template<class UnaryFunctionType>
332  static void
333  transform_mv_notSameObject
334  (const char kernelLabel[],
335  ExecutionSpace execSpace,
338  UnaryFunctionType f)
339  {
340  memory_space memSpace;
341  // Generic lambdas need C++14, so we need typedefs here.
342  using input_view_type =
344  decltype (readOnly (input).on (memSpace). at(execSpace))>;
345  using output_view_type =
347  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
348 
350  ([=] (const input_view_type& input_lcl,
351  const output_view_type& output_lcl) {
352  using functor_type = MultiVectorUnaryTransformLoopBody<
353  input_view_type, output_view_type, UnaryFunctionType, LO>;
354  functor_type g (input_lcl, output_lcl, f);
355 
356  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
357  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
358  range_type range (execSpace, 0, lclNumRows);
359 
360  Kokkos::parallel_for (kernelLabel, range, g);
361  },
362  readOnly (input).on (memSpace).at (execSpace),
363  writeOnly (output).on (memSpace).at (execSpace));
364  }
365 
366  public:
367  template<class UnaryFunctionType>
368  static void
369  transform (const char kernelLabel[],
370  ExecutionSpace execSpace,
373  UnaryFunctionType f)
374  {
375  using Teuchos::TypeNameTraits;
376  using std::endl;
377 
378  const int myRank = output.getMap ()->getComm ()->getRank ();
379  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
380  if (verbose) {
381  std::ostringstream os;
382  os << "Proc " << myRank << ": Tpetra::transform:" << endl
383  << " kernelLabel: " << kernelLabel << endl
384  << " ExecutionSpace: "
385  << TypeNameTraits<ExecutionSpace>::name () << endl;
386  std::cerr << os.str ();
387  }
388 
389  const size_t numVecs = output.getNumVectors ();
390  TEUCHOS_TEST_FOR_EXCEPTION
391  (input.getNumVectors () != numVecs, std::invalid_argument,
392  "input.getNumVectors() = " << input.getNumVectors () <<
393  " != output.getNumVectors() = " << numVecs << ".");
394 
395  const bool constStride = output.isConstantStride () &&
396  input.isConstantStride ();
397 
398  memory_space memSpace;
399  if (numVecs == size_t (1) || ! constStride) {
400  for (size_t j = 0; j < numVecs; ++j) {
401  auto output_j = output.getVectorNonConst (j);
402  auto input_j = input.getVectorNonConst (j);
403 
404  // Check for aliasing here, since it's possible for only
405  // some columns of input & output to alias. Aliasing is a
406  // correctness issue (e.g., for sync'ing).
407  if (sameObject (*output_j, *input_j)) {
408  unaryTransformSameMultiVector (kernelLabel, execSpace,
409  *output_j, f);
410  }
411  else {
412  transform_vec_notSameObject (kernelLabel, execSpace,
413  *input_j, *output_j, f);
414  }
415  }
416  }
417  else {
418  if (sameObject (output, input)) {
419  unaryTransformSameMultiVector (kernelLabel, execSpace,
420  output, f);
421  }
422  else {
423  transform_mv_notSameObject (kernelLabel, execSpace,
424  input, output, f);
425  }
426  }
427  }
428 
429  // Implementation of binary transform on MultiVectors.
430  template<class BinaryFunctionType>
431  static void
432  transform (const char kernelLabel[],
433  ExecutionSpace execSpace,
437  BinaryFunctionType f)
438  {
439  using Teuchos::TypeNameTraits;
440  using std::endl;
441  const char prefix[] = "Tpetra::transform (binary): ";
442 
443  const int myRank = output.getMap ()->getComm ()->getRank ();
444  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
445  if (verbose) {
446  std::ostringstream os;
447  os << "Proc " << myRank << ": " << prefix << endl
448  << " Tpetra::MultiVector<" << TypeNameTraits<SC>::name ()
449  << ", " << TypeNameTraits<LO>::name () << ", "
450  << TypeNameTraits<GO>::name () << ", "
451  << TypeNameTraits<NT>::name () << ">" << endl
452  << " kernelLabel: " << kernelLabel << endl
453  << " ExecutionSpace: "
454  << TypeNameTraits<ExecutionSpace>::name () << endl;
455  std::cerr << os.str ();
456  }
457 
458  const size_t numVecs = output.getNumVectors ();
459  TEUCHOS_TEST_FOR_EXCEPTION
460  (input1.getNumVectors () != numVecs, std::invalid_argument,
461  prefix << "input1.getNumVectors() = " << input1.getNumVectors ()
462  << " != output.getNumVectors() = " << numVecs << ".");
463  TEUCHOS_TEST_FOR_EXCEPTION
464  (input2.getNumVectors () != numVecs, std::invalid_argument,
465  prefix << "input2.getNumVectors() = " << input2.getNumVectors ()
466  << " != output.getNumVectors() = " << numVecs << ".");
467 
468  const bool constStride = output.isConstantStride () &&
469  input1.isConstantStride () && input2.isConstantStride ();
470  memory_space memSpace;
471 
472  const LO lclNumRows = static_cast<LO> (output.getLocalLength ());
473  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
474  range_type range (execSpace, 0, lclNumRows);
475 
476  if (numVecs == size_t (1) || ! constStride) { // operate on Vectors
477  for (size_t j = 0; j < numVecs; ++j) {
478  auto output_j = output.getVectorNonConst (j);
479  auto input1_j = input1.getVectorNonConst (j);
480  auto input2_j = input2.getVectorNonConst (j);
481 
482  // Check for aliasing here, since it's possible for only
483  // some columns of input & output to alias. Aliasing is a
484  // correctness issue (e.g., for sync'ing).
485  const bool outin1same = sameObject (*output_j, *input1_j);
486  const bool outin2same = sameObject (*output_j, *input2_j);
487  // Don't double-view input1.
488  const bool in1in2same = sameObject (*input1_j, *input2_j);
489  const bool allsame = outin1same && outin2same; // by transitivity
490 
491  // Help GCC 4.9.3 deduce the types of *output_j,
492  // *input1_j, and *input2_j. See discussion here:
493  // https://github.com/trilinos/Trilinos/pull/5115
494  using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;
495  vec_type& input1_j_ref = *input1_j;
496  vec_type& input2_j_ref = *input2_j;
497  vec_type& output_j_ref = *output_j;
498 
499  // Once we get C++14 generic lambdas, we can get rid of
500  // these typedefs and use "const auto&" as the argument(s)
501  // for the withLocalAccess lambdas below.
502  using input1_view_type =
504  decltype (readOnly (input1_j_ref).on (memSpace). at(execSpace))>;
505  using input2_view_type =
507  decltype (readOnly (input2_j_ref).on (memSpace). at(execSpace))>;
508  using rw_output_view_type =
510  decltype (readWrite (output_j_ref).on (memSpace). at(execSpace))>;
511  using wo_output_view_type =
513  decltype (writeOnly (output_j_ref).on (memSpace). at(execSpace))>;
514 
515  if (allsame) {
517  ([=] (const rw_output_view_type& output_lcl) {
518  using functor_type = VectorBinaryTransformLoopBody<
519  typename rw_output_view_type::const_type,
520  typename rw_output_view_type::const_type,
521  rw_output_view_type,
522  BinaryFunctionType, LO>;
523  functor_type functor (output_lcl, output_lcl, output_lcl, f);
524  Kokkos::parallel_for (kernelLabel, range, functor);
525  },
526  readWrite (output_j_ref).on (memSpace).at (execSpace));
527  }
528  else if (in1in2same) { // and not same as output
530  ([=] (const input1_view_type& input1_lcl,
531  const wo_output_view_type& output_lcl) {
532  using functor_type = VectorBinaryTransformLoopBody<
533  input1_view_type,
534  input1_view_type,
535  wo_output_view_type,
536  BinaryFunctionType, LO>;
537  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
538  Kokkos::parallel_for (kernelLabel, range, functor);
539  },
540  readOnly (input1_j_ref).on (memSpace).at (execSpace),
541  writeOnly (output_j_ref).on (memSpace).at (execSpace));
542  }
543  else if (outin1same) { // and input1 not same as input2
545  ([=] (const input2_view_type& input2_lcl,
546  const rw_output_view_type& output_lcl) {
547  using functor_type = VectorBinaryTransformLoopBody<
548  typename rw_output_view_type::const_type,
549  input2_view_type,
550  rw_output_view_type,
551  BinaryFunctionType, LO>;
552  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
553  Kokkos::parallel_for (kernelLabel, range, functor);
554  },
555  readOnly (input2_j_ref).on (memSpace).at (execSpace),
556  readWrite (output_j_ref).on (memSpace).at (execSpace));
557  }
558  else if (outin2same) { // and input1 not same as input2
560  ([=] (const input1_view_type& input1_lcl,
561  const rw_output_view_type& output_lcl) {
562  using functor_type = VectorBinaryTransformLoopBody<
563  input1_view_type,
564  typename rw_output_view_type::const_type,
565  rw_output_view_type,
566  BinaryFunctionType, LO>;
567  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
568  Kokkos::parallel_for (kernelLabel, range, functor);
569  },
570  readOnly (input1_j_ref).on (memSpace).at (execSpace),
571  readWrite (output_j_ref).on (memSpace).at (execSpace));
572  }
573  else { // output, input1, and input2 all differ
575  ([=] (const input1_view_type& input1_lcl,
576  const input2_view_type& input2_lcl,
577  const wo_output_view_type& output_lcl) {
578  using functor_type = VectorBinaryTransformLoopBody<
579  input1_view_type,
580  input2_view_type,
581  wo_output_view_type,
582  BinaryFunctionType, LO>;
583  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
584  Kokkos::parallel_for (kernelLabel, range, functor);
585  },
586  readOnly (input1_j_ref).on (memSpace).at (execSpace),
587  readOnly (input2_j_ref).on (memSpace).at (execSpace),
588  writeOnly (output_j_ref).on (memSpace).at (execSpace));
589  }
590  }
591  }
592  else { // operate on MultiVectors
593  // Check for aliasing here, since it's possible for only
594  // some columns of input & output to alias. Aliasing is a
595  // correctness issue (e.g., for sync'ing).
596  const bool outin1same = sameObject (output, input1);
597  const bool outin2same = sameObject (output, input2);
598  // Don't double-view input1.
599  const bool in1in2same = sameObject (input1, input2);
600  const bool allsame = outin1same && outin2same; // by transitivity
601 
602  // Once we get C++14 generic lambdas, we can get rid of
603  // these typedefs and use "const auto&" as the argument(s)
604  // for the withLocalAccess lambdas below.
605  using input1_view_type =
607  decltype (readOnly (input1).on (memSpace). at(execSpace))>;
608  using input2_view_type =
610  decltype (readOnly (input2).on (memSpace). at(execSpace))>;
611  using rw_output_view_type =
613  decltype (readWrite (output).on (memSpace). at(execSpace))>;
614  using wo_output_view_type =
616  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
617 
618  if (allsame) {
620  ([=] (const rw_output_view_type& output_lcl) {
621  using functor_type = MultiVectorBinaryTransformLoopBody<
622  typename rw_output_view_type::const_type,
623  typename rw_output_view_type::const_type,
624  rw_output_view_type,
625  BinaryFunctionType, LO>;
626  functor_type functor (output_lcl, output_lcl, output_lcl, f);
627  Kokkos::parallel_for (kernelLabel, range, functor);
628  },
629  readWrite (output).on (memSpace).at (execSpace));
630  }
631  else if (in1in2same) { // and not same as output
633  ([=] (const input1_view_type& input1_lcl,
634  const wo_output_view_type& output_lcl) {
635  using functor_type = MultiVectorBinaryTransformLoopBody<
636  input1_view_type,
637  input1_view_type,
638  wo_output_view_type,
639  BinaryFunctionType, LO>;
640  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
641  Kokkos::parallel_for (kernelLabel, range, functor);
642  },
643  readOnly (input1).on (memSpace).at (execSpace),
644  writeOnly (output).on (memSpace).at (execSpace));
645  }
646  else if (outin1same) { // and input1 not same as input2
648  ([=] (const input2_view_type& input2_lcl,
649  const rw_output_view_type& output_lcl) {
650  using functor_type = MultiVectorBinaryTransformLoopBody<
651  typename rw_output_view_type::const_type,
652  input2_view_type,
653  rw_output_view_type,
654  BinaryFunctionType, LO>;
655  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
656  Kokkos::parallel_for (kernelLabel, range, functor);
657  },
658  readOnly (input2).on (memSpace).at (execSpace),
659  readWrite (output).on (memSpace).at (execSpace));
660  }
661  else if (outin2same) { // and input1 not same as input2
663  ([=] (const input1_view_type& input1_lcl,
664  const rw_output_view_type& output_lcl) {
665  using functor_type = MultiVectorBinaryTransformLoopBody<
666  input1_view_type,
667  typename rw_output_view_type::const_type,
668  rw_output_view_type,
669  BinaryFunctionType, LO>;
670  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
671  Kokkos::parallel_for (kernelLabel, range, functor);
672  },
673  readOnly (input1).on (memSpace).at (execSpace),
674  readWrite (output).on (memSpace).at (execSpace));
675  }
676  else { // output, input1, and input2 all differ
678  ([=] (const input1_view_type& input1_lcl,
679  const input2_view_type& input2_lcl,
680  const wo_output_view_type& output_lcl) {
681  using functor_type = MultiVectorBinaryTransformLoopBody<
682  input1_view_type,
683  input2_view_type,
684  wo_output_view_type,
685  BinaryFunctionType, LO>;
686  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
687  Kokkos::parallel_for (kernelLabel, range, functor);
688  },
689  readOnly (input1).on (memSpace).at (execSpace),
690  readOnly (input2).on (memSpace).at (execSpace),
691  writeOnly (output).on (memSpace).at (execSpace));
692  }
693  }
694  }
695  };
696 
703  template<class ExecutionSpace,
704  class SC, class LO, class GO, class NT>
705  struct Transform<ExecutionSpace,
706  ::Tpetra::Vector<SC, LO, GO, NT> >
707  {
708  // Implementation of unary transform on Vectors.
709  template<class UnaryFunctionType>
710  static void
711  transform (const char kernelLabel[],
712  ExecutionSpace execSpace,
715  UnaryFunctionType f)
716  {
718  using impl_type = Transform<ExecutionSpace, MV>;
719  using UFT = UnaryFunctionType;
720 
721  impl_type::template transform<UFT> (kernelLabel, execSpace,
722  input, output, f);
723  }
724 
725  // Implementation of binary transform on Vectors.
726  template<class BinaryFunctionType>
727  static void
728  transform (const char kernelLabel[],
729  ExecutionSpace execSpace,
733  BinaryFunctionType f)
734  {
736  using impl_type = Transform<ExecutionSpace, MV>;
737  using BFT = BinaryFunctionType;
738 
739  impl_type::template transform<BFT> (kernelLabel, execSpace,
740  input1, input2, output, f);
741  }
742  };
743 
744  } // namespace Details
745 
746 } // namespace Tpetra
747 
748 #endif // TPETRA_TRANSFORM_MULTIVECTOR_HPP
Include this file to make Tpetra::MultiVector and Tpetra::Vector work with Tpetra::withLocalAccess.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Details::LocalAccess< GlobalObjectType, Access::WriteOnly > writeOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in write-only mode...
Details::LocalAccess< GlobalObjectType, Access::ReadWrite > readWrite(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-and-write mode...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
Declaration and definition of Tpetra::transform; declaration of helper classes for users to specializ...
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
Specialize this class to implement Tpetra::transform for specific GlobalDataStructure types...
static bool verbose()
Whether Tpetra is in verbose mode.
void withLocalAccess(typename Details::ArgsToFunction< LocalAccessTypes...>::type userFunction, LocalAccessTypes...localAccesses)
Get access to a Tpetra global object&#39;s local data.
Details::LocalAccess< GlobalObjectType, Access::ReadOnly > readOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-only mode...
Include this file to make Tpetra::for_each work with Tpetra::MultiVector and Tpetra::Vector.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
A distributed dense vector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
static void transform(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &input, GlobalDataStructure &output, UnaryFunctionType f)
Unary transform: output_i = f(input_i).
typename Details::GetNonowningLocalObject< LocalAccessType >::nonowning_local_object_type with_local_access_function_argument_type
Type of the local object, that is an argument to the function the user gives to withLocalAccess.
void for_each(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &X, UserFunctionType f)
Apply a function entrywise to each local entry of a Tpetra global data structure, analogously to std:...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.