Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_transform_MultiVector.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ************************************************************************
39 // @HEADER
40 */
41 
42 #ifndef TPETRA_TRANSFORM_MULTIVECTOR_HPP
43 #define TPETRA_TRANSFORM_MULTIVECTOR_HPP
44 
45 #include "Tpetra_transform.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Teuchos_Comm.hpp"
51 #include "Teuchos_TestForException.hpp"
52 #include "Kokkos_Core.hpp"
53 #include <iostream>
54 #include <sstream>
55 
59 
60 namespace Tpetra {
61  namespace Details {
62 
63  // Kokkos::parallel_for functor that implements unary
64  // Tpetra::transform for MultiVector objects.
65  //
66  // The input and output Views may be the same object (locally).
67  template<class InputViewType,
68  class OutputViewType,
69  class UnaryFunctionType,
70  class LocalIndexType>
71  class MultiVectorUnaryTransformLoopBody {
72  private:
73  static_assert (static_cast<int> (InputViewType::Rank) == 2,
74  "InputViewType must be a rank-2 Kokkos::View.");
75  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
76  "OutputViewType must be a rank-2 Kokkos::View.");
77 
78  public:
79  MultiVectorUnaryTransformLoopBody (const InputViewType& in,
80  const OutputViewType& out,
81  UnaryFunctionType f) :
82  in_ (in), out_ (out), f_ (f)
83  {}
84 
85  KOKKOS_INLINE_FUNCTION void
86  operator () (const LocalIndexType i) const {
87  using LO = LocalIndexType;
88  const LO numCols = static_cast<LO> (in_.extent (1));
89  for (LO j = 0; j < numCols; ++j) {
90  out_(i,j) = f_ (in_(i,j));
91  }
92  }
93 
94  private:
95  InputViewType in_;
96  OutputViewType out_;
97  UnaryFunctionType f_;
98  };
99 
100  // Kokkos::parallel_for functor that implements binary
101  // Tpetra::transform for MultiVector objects.
102  //
103  // The input and output Views may be the same object (locally).
104  template<class InputViewType1,
105  class InputViewType2,
106  class OutputViewType,
107  class BinaryFunctionType,
108  class LocalIndexType>
109  class MultiVectorBinaryTransformLoopBody {
110  private:
111  static_assert (static_cast<int> (InputViewType1::Rank) == 2,
112  "InputViewType1 must be a rank-2 Kokkos::View.");
113  static_assert (static_cast<int> (InputViewType2::Rank) == 2,
114  "InputViewType2 must be a rank-2 Kokkos::View.");
115  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
116  "OutputViewType must be a rank-2 Kokkos::View.");
117 
118  public:
119  MultiVectorBinaryTransformLoopBody (const InputViewType1& in1,
120  const InputViewType2& in2,
121  const OutputViewType& out,
122  BinaryFunctionType f) :
123  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
124  {}
125 
126  KOKKOS_INLINE_FUNCTION void
127  operator () (const LocalIndexType i) const {
128  using LO = LocalIndexType;
129  const LO numCols = static_cast<LO> (in1_.extent (1));
130  for (LO j = 0; j < numCols; ++j) {
131  out_(i,j) = f_ (in1_(i,j), in2_(i,j));
132  }
133  }
134 
135  private:
136  InputViewType1 in1_;
137  InputViewType2 in2_;
138  OutputViewType out_;
139  BinaryFunctionType f_;
140  };
141 
142  // Kokkos::parallel_for functor that implements unary
143  // Tpetra::transform for Vector objects.
144  //
145  // The input and output Views may be the same object (locally).
146  template<class InputViewType,
147  class OutputViewType,
148  class UnaryFunctionType,
149  class LocalIndexType>
150  class VectorUnaryTransformLoopBody {
151  private:
152  static_assert (static_cast<int> (InputViewType::Rank) == 1,
153  "InputViewType must be a rank-1 Kokkos::View.");
154  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
155  "OutputViewType must be a rank-1 Kokkos::View.");
156 
157  public:
158  VectorUnaryTransformLoopBody (const InputViewType& in,
159  const OutputViewType& out,
160  UnaryFunctionType f) :
161  in_ (in), out_ (out), f_ (f)
162  {}
163 
164  KOKKOS_INLINE_FUNCTION void
165  operator () (const LocalIndexType i) const {
166  out_(i) = f_ (in_(i));
167  }
168 
169  private:
170  InputViewType in_;
171  OutputViewType out_;
172  UnaryFunctionType f_;
173  };
174 
175  // Kokkos::parallel_for functor that implements binary
176  // Tpetra::transform for Vector objects.
177  //
178  // The input and output Views may be the same object (locally).
179  template<class InputViewType1,
180  class InputViewType2,
181  class OutputViewType,
182  class BinaryFunctionType,
183  class LocalIndexType>
184  class VectorBinaryTransformLoopBody {
185  private:
186  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
187  "InputViewType1 must be a rank-1 Kokkos::View.");
188  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
189  "InputViewType1 must be a rank-1 Kokkos::View.");
190  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
191  "OutputViewType must be a rank-1 Kokkos::View.");
192 
193  public:
194  VectorBinaryTransformLoopBody (const InputViewType1& in1,
195  const InputViewType2& in2,
196  const OutputViewType& out,
197  BinaryFunctionType f) :
198  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
199  {}
200 
201  KOKKOS_INLINE_FUNCTION void
202  operator () (const LocalIndexType i) const {
203  out_(i) = f_ (in1_(i), in2_(i));
204  }
205 
206  private:
207  InputViewType1 in1_;
208  InputViewType2 in2_;
209  OutputViewType out_;
210  BinaryFunctionType f_;
211  };
212 
213  // CUDA 9.2 doesn't like it when you call lambdas in private or
214  // protected methods (of Transform, in this case). Thus, I've
215  // broken out Transform::transform_sameObject into a separate
216  // functor and nonmember function.
217  template<class ExecutionSpace,
218  class SC, class LO, class GO, class NT,
219  class UnaryFunctionType>
220  class UnaryTransformSameMultiVector {
221  private:
223  using IST = typename MV::impl_scalar_type;
224 
225  public:
226  UnaryTransformSameMultiVector (UnaryFunctionType f) : f_ (f) {}
227 
228  KOKKOS_INLINE_FUNCTION void operator() (IST& X_ij) const {
229  // User function has the form IST(const IST&) suitable for
230  // transform, so we have to convert it to a for_each function
231  // of the form void(IST&).
232  X_ij = f_(X_ij);
233  }
234 
235  private:
236  UnaryFunctionType f_;
237  };
238 
239  template<class ExecutionSpace,
240  class SC, class LO, class GO, class NT,
241  class UnaryFunctionType>
242  void
243  unaryTransformSameMultiVector (const char kernelLabel[],
244  ExecutionSpace execSpace,
246  UnaryFunctionType f)
247  {
248  using functor_type = UnaryTransformSameMultiVector<ExecutionSpace,
249  SC, LO, GO, NT, UnaryFunctionType>;
250  ::Tpetra::for_each (kernelLabel, execSpace, output, functor_type (f));
251  }
252 
255  template<class ExecutionSpace,
256  class SC, class LO, class GO, class NT>
257  struct Transform<ExecutionSpace,
258  ::Tpetra::MultiVector<SC, LO, GO, NT> >
259  {
260  private:
261  // Given a Kokkos execution space on which the user wants to run
262  // the transform, and a memory space in which the MultiVector's
263  // data live, determine the memory space that transform should
264  // use in its withLocalAccess call.
265  template<class MemorySpace>
266  using transform_memory_space =
267  typename std::conditional<
268  Kokkos::SpaceAccessibility<
269  ExecutionSpace,
270  typename MemorySpace::memory_space>::accessible,
271  typename MemorySpace::memory_space,
272  typename ExecutionSpace::memory_space>::type;
273 
275  using preferred_memory_space =
276  typename MV::device_type::memory_space;
277  using memory_space =
278  transform_memory_space<preferred_memory_space>;
279  using IST = typename MV::impl_scalar_type;
280 
281  // This is not the same as "aliases" -- we actually want to know
282  // if input and output are the same object (locally), so that we
283  // can sync correctly. The result of transform is undefined if
284  // input and output partially alias one another.
285  static bool
286  sameObject (const ::Tpetra::MultiVector<SC, LO, GO, NT>& input,
287  const ::Tpetra::MultiVector<SC, LO, GO, NT>& output)
288  {
289  return &input == &output ||
290  input.getLocalViewHost ().data () ==
291  output.getLocalViewHost ().data () ||
292  input.getLocalViewDevice ().data () ==
293  output.getLocalViewDevice ().data ();
294  }
295 
296  template<class UnaryFunctionType>
297  static void
298  transform_vec_notSameObject
299  (const char kernelLabel[],
300  ExecutionSpace execSpace,
303  UnaryFunctionType f)
304  {
305  memory_space memSpace;
306  // Generic lambdas need C++14, so we need a typedef here.
307  using input_view_type =
309  decltype (readOnly (input).on (memSpace). at(execSpace))>;
310  using output_view_type =
312  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
313 
315  ([=] (const input_view_type& input_lcl,
316  const output_view_type& output_lcl) {
317  using functor_type = VectorUnaryTransformLoopBody<
318  input_view_type, output_view_type, UnaryFunctionType, LO>;
319  functor_type g (input_lcl, output_lcl, f);
320 
321  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
322  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
323  range_type range (execSpace, 0, lclNumRows);
324 
325  Kokkos::parallel_for (kernelLabel, range, g);
326  },
327  readOnly (input).on (memSpace).at (execSpace),
328  writeOnly (output).on (memSpace).at (execSpace));
329  }
330 
331  template<class UnaryFunctionType>
332  static void
333  transform_mv_notSameObject
334  (const char kernelLabel[],
335  ExecutionSpace execSpace,
338  UnaryFunctionType f)
339  {
340  memory_space memSpace;
341  // Generic lambdas need C++14, so we need typedefs here.
342  using input_view_type =
344  decltype (readOnly (input).on (memSpace). at(execSpace))>;
345  using output_view_type =
347  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
348 
350  ([=] (const input_view_type& input_lcl,
351  const output_view_type& output_lcl) {
352  using functor_type = MultiVectorUnaryTransformLoopBody<
353  input_view_type, output_view_type, UnaryFunctionType, LO>;
354  functor_type g (input_lcl, output_lcl, f);
355 
356  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
357  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
358  range_type range (execSpace, 0, lclNumRows);
359 
360  Kokkos::parallel_for (kernelLabel, range, g);
361  },
362  readOnly (input).on (memSpace).at (execSpace),
363  writeOnly (output).on (memSpace).at (execSpace));
364  }
365 
366  public:
367  template<class UnaryFunctionType>
368  static void
369  transform (const char kernelLabel[],
370  ExecutionSpace execSpace,
373  UnaryFunctionType f)
374  {
375  using Teuchos::TypeNameTraits;
376  using std::endl;
377 
378  const int myRank = output.getMap ()->getComm ()->getRank ();
379  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
380  if (verbose) {
381  std::ostringstream os;
382  os << "Proc " << myRank << ": Tpetra::transform:" << endl
383  << " kernelLabel: " << kernelLabel << endl
384  << " ExecutionSpace: "
385  << TypeNameTraits<ExecutionSpace>::name () << endl;
386  std::cerr << os.str ();
387  }
388 
389  const size_t numVecs = output.getNumVectors ();
390  TEUCHOS_TEST_FOR_EXCEPTION
391  (input.getNumVectors () != numVecs, std::invalid_argument,
392  "input.getNumVectors() = " << input.getNumVectors () <<
393  " != output.getNumVectors() = " << numVecs << ".");
394 
395  const bool constStride = output.isConstantStride () &&
396  input.isConstantStride ();
397 
398  memory_space memSpace;
399  if (numVecs == size_t (1) || ! constStride) {
400  for (size_t j = 0; j < numVecs; ++j) {
401  auto output_j = output.getVectorNonConst (j);
402  auto input_j = input.getVectorNonConst (j);
403 
404  // Check for aliasing here, since it's possible for only
405  // some columns of input & output to alias. Aliasing is a
406  // correctness issue (e.g., for sync'ing).
407  if (sameObject (*output_j, *input_j)) {
408  unaryTransformSameMultiVector (kernelLabel, execSpace,
409  *output_j, f);
410  }
411  else {
412  transform_vec_notSameObject (kernelLabel, execSpace,
413  *input_j, *output_j, f);
414  }
415  }
416  }
417  else {
418  if (sameObject (output, input)) {
419  unaryTransformSameMultiVector (kernelLabel, execSpace,
420  output, f);
421  }
422  else {
423  transform_mv_notSameObject (kernelLabel, execSpace,
424  input, output, f);
425  }
426  }
427  }
428 
429  // Implementation of binary transform on MultiVectors.
430  template<class BinaryFunctionType>
431  static void
432  transform (const char kernelLabel[],
433  ExecutionSpace execSpace,
437  BinaryFunctionType f)
438  {
439  using Teuchos::TypeNameTraits;
440  using std::endl;
441  const char prefix[] = "Tpetra::transform (binary): ";
442 
443  const int myRank = output.getMap ()->getComm ()->getRank ();
444  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
445  if (verbose) {
446  std::ostringstream os;
447  os << "Proc " << myRank << ": " << prefix << endl
448  << " Tpetra::MultiVector<" << TypeNameTraits<SC>::name ()
449  << ", " << TypeNameTraits<LO>::name () << ", "
450  << TypeNameTraits<GO>::name () << ", "
451  << TypeNameTraits<NT>::name () << ">" << endl
452  << " kernelLabel: " << kernelLabel << endl
453  << " ExecutionSpace: "
454  << TypeNameTraits<ExecutionSpace>::name () << endl;
455  std::cerr << os.str ();
456  }
457 
458  const size_t numVecs = output.getNumVectors ();
459  TEUCHOS_TEST_FOR_EXCEPTION
460  (input1.getNumVectors () != numVecs, std::invalid_argument,
461  prefix << "input1.getNumVectors() = " << input1.getNumVectors ()
462  << " != output.getNumVectors() = " << numVecs << ".");
463  TEUCHOS_TEST_FOR_EXCEPTION
464  (input2.getNumVectors () != numVecs, std::invalid_argument,
465  prefix << "input2.getNumVectors() = " << input2.getNumVectors ()
466  << " != output.getNumVectors() = " << numVecs << ".");
467 
468  const bool constStride = output.isConstantStride () &&
469  input1.isConstantStride () && input2.isConstantStride ();
470  memory_space memSpace;
471 
472  const LO lclNumRows = static_cast<LO> (output.getLocalLength ());
473  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
474  range_type range (execSpace, 0, lclNumRows);
475 
476  if (numVecs == size_t (1) || ! constStride) { // operate on Vectors
477  for (size_t j = 0; j < numVecs; ++j) {
478  auto output_j = output.getVectorNonConst (j);
479  auto input1_j = input1.getVectorNonConst (j);
480  auto input2_j = input2.getVectorNonConst (j);
481 
482  // Check for aliasing here, since it's possible for only
483  // some columns of input & output to alias. Aliasing is a
484  // correctness issue (e.g., for sync'ing).
485  const bool outin1same = sameObject (*output_j, *input1_j);
486  const bool outin2same = sameObject (*output_j, *input2_j);
487  // Don't double-view input1.
488  const bool in1in2same = sameObject (*input1_j, *input2_j);
489  const bool allsame = outin1same && outin2same; // by transitivity
490 
491  // Help GCC 4.9.3 deduce the types of *output_j,
492  // *input1_j, and *input2_j. See discussion here:
493  // https://github.com/trilinos/Trilinos/pull/5115
494  using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;
495  vec_type& input1_j_ref = *input1_j;
496  vec_type& input2_j_ref = *input2_j;
497  vec_type& output_j_ref = *output_j;
498 
499  // Once we get C++14 generic lambdas, we can get rid of
500  // these typedefs and use "const auto&" as the argument(s)
501  // for the withLocalAccess lambdas below.
502  using input1_view_type =
504  decltype (readOnly (input1_j_ref).on (memSpace). at(execSpace))>;
505  using input2_view_type =
507  decltype (readOnly (input2_j_ref).on (memSpace). at(execSpace))>;
508  using rw_output_view_type =
510  decltype (readWrite (output_j_ref).on (memSpace). at(execSpace))>;
511  using wo_output_view_type =
513  decltype (writeOnly (output_j_ref).on (memSpace). at(execSpace))>;
514 
515  if (allsame) {
517  ([=] (const rw_output_view_type& output_lcl) {
518  using functor_type = VectorBinaryTransformLoopBody<
519  typename rw_output_view_type::const_type,
520  typename rw_output_view_type::const_type,
521  rw_output_view_type,
522  BinaryFunctionType, LO>;
523  functor_type functor (output_lcl, output_lcl, output_lcl, f);
524  Kokkos::parallel_for (kernelLabel, range, functor);
525  },
526  readWrite (output_j_ref).on (memSpace).at (execSpace));
527  }
528  else if (in1in2same) { // and not same as output
530  ([=] (const input1_view_type& input1_lcl,
531  const wo_output_view_type& output_lcl) {
532  using functor_type = VectorBinaryTransformLoopBody<
533  input1_view_type,
534  input1_view_type,
535  wo_output_view_type,
536  BinaryFunctionType, LO>;
537  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
538  Kokkos::parallel_for (kernelLabel, range, functor);
539  },
540  readOnly (input1_j_ref).on (memSpace).at (execSpace),
541  writeOnly (output_j_ref).on (memSpace).at (execSpace));
542  }
543  else if (outin1same) { // and input1 not same as input2
545  ([=] (const input2_view_type& input2_lcl,
546  const rw_output_view_type& output_lcl) {
547  using functor_type = VectorBinaryTransformLoopBody<
548  typename rw_output_view_type::const_type,
549  input2_view_type,
550  rw_output_view_type,
551  BinaryFunctionType, LO>;
552  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
553  Kokkos::parallel_for (kernelLabel, range, functor);
554  },
555  readOnly (input2_j_ref).on (memSpace).at (execSpace),
556  readWrite (output_j_ref).on (memSpace).at (execSpace));
557  }
558  else if (outin2same) { // and input1 not same as input2
560  ([=] (const input1_view_type& input1_lcl,
561  const rw_output_view_type& output_lcl) {
562  using functor_type = VectorBinaryTransformLoopBody<
563  input1_view_type,
564  typename rw_output_view_type::const_type,
565  rw_output_view_type,
566  BinaryFunctionType, LO>;
567  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
568  Kokkos::parallel_for (kernelLabel, range, functor);
569  },
570  readOnly (input1_j_ref).on (memSpace).at (execSpace),
571  readWrite (output_j_ref).on (memSpace).at (execSpace));
572  }
573  else { // output, input1, and input2 all differ
575  ([=] (const input1_view_type& input1_lcl,
576  const input2_view_type& input2_lcl,
577  const wo_output_view_type& output_lcl) {
578  using functor_type = VectorBinaryTransformLoopBody<
579  input1_view_type,
580  input2_view_type,
581  wo_output_view_type,
582  BinaryFunctionType, LO>;
583  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
584  Kokkos::parallel_for (kernelLabel, range, functor);
585  },
586  readOnly (input1_j_ref).on (memSpace).at (execSpace),
587  readOnly (input2_j_ref).on (memSpace).at (execSpace),
588  writeOnly (output_j_ref).on (memSpace).at (execSpace));
589  }
590  }
591  }
592  else { // operate on MultiVectors
593  // Check for aliasing here, since it's possible for only
594  // some columns of input & output to alias. Aliasing is a
595  // correctness issue (e.g., for sync'ing).
596  const bool outin1same = sameObject (output, input1);
597  const bool outin2same = sameObject (output, input2);
598  // Don't double-view input1.
599  const bool in1in2same = sameObject (input1, input2);
600  const bool allsame = outin1same && outin2same; // by transitivity
601 
602  // Once we get C++14 generic lambdas, we can get rid of
603  // these typedefs and use "const auto&" as the argument(s)
604  // for the withLocalAccess lambdas below.
605  using input1_view_type =
607  decltype (readOnly (input1).on (memSpace). at(execSpace))>;
608  using input2_view_type =
610  decltype (readOnly (input2).on (memSpace). at(execSpace))>;
611  using rw_output_view_type =
613  decltype (readWrite (output).on (memSpace). at(execSpace))>;
614  using wo_output_view_type =
616  decltype (writeOnly (output).on (memSpace). at(execSpace))>;
617 
618  if (allsame) {
620  ([=] (const rw_output_view_type& output_lcl) {
621  using functor_type = MultiVectorBinaryTransformLoopBody<
622  typename rw_output_view_type::const_type,
623  typename rw_output_view_type::const_type,
624  rw_output_view_type,
625  BinaryFunctionType, LO>;
626  functor_type functor (output_lcl, output_lcl, output_lcl, f);
627  Kokkos::parallel_for (kernelLabel, range, functor);
628  },
629  readWrite (output).on (memSpace).at (execSpace));
630  }
631  else if (in1in2same) { // and not same as output
633  ([=] (const input1_view_type& input1_lcl,
634  const wo_output_view_type& output_lcl) {
635  using functor_type = MultiVectorBinaryTransformLoopBody<
636  input1_view_type,
637  input1_view_type,
638  wo_output_view_type,
639  BinaryFunctionType, LO>;
640  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
641  Kokkos::parallel_for (kernelLabel, range, functor);
642  },
643  readOnly (input1).on (memSpace).at (execSpace),
644  writeOnly (output).on (memSpace).at (execSpace));
645  }
646  else if (outin1same) { // and input1 not same as input2
648  ([=] (const input2_view_type& input2_lcl,
649  const rw_output_view_type& output_lcl) {
650  using functor_type = MultiVectorBinaryTransformLoopBody<
651  typename rw_output_view_type::const_type,
652  input2_view_type,
653  rw_output_view_type,
654  BinaryFunctionType, LO>;
655  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
656  Kokkos::parallel_for (kernelLabel, range, functor);
657  },
658  readOnly (input2).on (memSpace).at (execSpace),
659  readWrite (output).on (memSpace).at (execSpace));
660  }
661  else if (outin2same) { // and input1 not same as input2
663  ([=] (const input1_view_type& input1_lcl,
664  const rw_output_view_type& output_lcl) {
665  using functor_type = MultiVectorBinaryTransformLoopBody<
666  input1_view_type,
667  typename rw_output_view_type::const_type,
668  rw_output_view_type,
669  BinaryFunctionType, LO>;
670  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
671  Kokkos::parallel_for (kernelLabel, range, functor);
672  },
673  readOnly (input1).on (memSpace).at (execSpace),
674  readWrite (output).on (memSpace).at (execSpace));
675  }
676  else { // output, input1, and input2 all differ
678  ([=] (const input1_view_type& input1_lcl,
679  const input2_view_type& input2_lcl,
680  const wo_output_view_type& output_lcl) {
681  using functor_type = MultiVectorBinaryTransformLoopBody<
682  input1_view_type,
683  input2_view_type,
684  wo_output_view_type,
685  BinaryFunctionType, LO>;
686  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
687  Kokkos::parallel_for (kernelLabel, range, functor);
688  },
689  readOnly (input1).on (memSpace).at (execSpace),
690  readOnly (input2).on (memSpace).at (execSpace),
691  writeOnly (output).on (memSpace).at (execSpace));
692  }
693  }
694  }
695  };
696 
703  template<class ExecutionSpace,
704  class SC, class LO, class GO, class NT>
705  struct Transform<ExecutionSpace,
706  ::Tpetra::Vector<SC, LO, GO, NT> >
707  {
708  // Implementation of unary transform on Vectors.
709  template<class UnaryFunctionType>
710  static void
711  transform (const char kernelLabel[],
712  ExecutionSpace execSpace,
715  UnaryFunctionType f)
716  {
718  using impl_type = Transform<ExecutionSpace, MV>;
719  using UFT = UnaryFunctionType;
720 
721  impl_type::template transform<UFT> (kernelLabel, execSpace,
722  input, output, f);
723  }
724 
725  // Implementation of binary transform on Vectors.
726  template<class BinaryFunctionType>
727  static void
728  transform (const char kernelLabel[],
729  ExecutionSpace execSpace,
733  BinaryFunctionType f)
734  {
736  using impl_type = Transform<ExecutionSpace, MV>;
737  using BFT = BinaryFunctionType;
738 
739  impl_type::template transform<BFT> (kernelLabel, execSpace,
740  input1, input2, output, f);
741  }
742  };
743 
744  } // namespace Details
745 
746 } // namespace Tpetra
747 
748 #endif // TPETRA_TRANSFORM_MULTIVECTOR_HPP
Include this file to make Tpetra::MultiVector and Tpetra::Vector work with Tpetra::withLocalAccess.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
Declaration and definition of Tpetra::transform; declaration of helper classes for users to specializ...
Details::LocalAccess< GlobalObjectType, Details::write_only > writeOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in write-only mode...
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
Details::LocalAccess< GlobalObjectType, Details::read_write > readWrite(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-and-write mode...
Specialize this class to implement Tpetra::transform for specific GlobalDataStructure types...
static bool verbose()
Whether Tpetra is in verbose mode.
void withLocalAccess(typename Details::ArgsToFunction< LocalAccessTypes...>::type userFunction, LocalAccessTypes...localAccesses)
Get access to a Tpetra global object&#39;s local data.
Include this file to make Tpetra::for_each work with Tpetra::MultiVector and Tpetra::Vector.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
A distributed dense vector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
static void transform(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &input, GlobalDataStructure &output, UnaryFunctionType f)
Unary transform: output_i = f(input_i).
Details::LocalAccess< GlobalObjectType, Details::read_only > readOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-only mode...
typename Details::GetNonowningLocalObject< LocalAccessType >::nonowning_local_object_type with_local_access_function_argument_type
Type of the local object, that is an argument to the function the user gives to withLocalAccess.
void for_each(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &X, UserFunctionType f)
Apply a function entrywise to each local entry of a Tpetra global data structure, analogously to std:...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.