Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_transform_MultiVector.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_TRANSFORM_MULTIVECTOR_HPP
45 #define TPETRA_TRANSFORM_MULTIVECTOR_HPP
46 
47 #include "Tpetra_transform.hpp"
51 #include "Tpetra_Map.hpp"
52 #include "Teuchos_Comm.hpp"
53 #include "Teuchos_TestForException.hpp"
54 #include "Kokkos_Core.hpp"
55 #include <iostream>
56 #include <sstream>
57 
61 
62 namespace Tpetra {
63  namespace Details {
64 
65  // Kokkos::parallel_for functor that implements unary
66  // Tpetra::transform for MultiVector objects.
67  //
68  // The input and output Views may be the same object (locally).
69  template<class InputViewType,
70  class OutputViewType,
71  class UnaryFunctionType,
72  class LocalIndexType>
73  class MultiVectorUnaryTransformLoopBody {
74  private:
75  static_assert (static_cast<int> (InputViewType::Rank) == 2,
76  "InputViewType must be a rank-2 Kokkos::View.");
77  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
78  "OutputViewType must be a rank-2 Kokkos::View.");
79 
80  public:
81  MultiVectorUnaryTransformLoopBody (const InputViewType& in,
82  const OutputViewType& out,
83  UnaryFunctionType f) :
84  in_ (in), out_ (out), f_ (f)
85  {}
86 
87  KOKKOS_INLINE_FUNCTION void
88  operator () (const LocalIndexType i) const {
89  using LO = LocalIndexType;
90  const LO numCols = static_cast<LO> (in_.extent (1));
91  for (LO j = 0; j < numCols; ++j) {
92  out_(i,j) = f_ (in_(i,j));
93  }
94  }
95 
96  private:
97  InputViewType in_;
98  OutputViewType out_;
99  UnaryFunctionType f_;
100  };
101 
102  // Kokkos::parallel_for functor that implements binary
103  // Tpetra::transform for MultiVector objects.
104  //
105  // The input and output Views may be the same object (locally).
106  template<class InputViewType1,
107  class InputViewType2,
108  class OutputViewType,
109  class BinaryFunctionType,
110  class LocalIndexType>
111  class MultiVectorBinaryTransformLoopBody {
112  private:
113  static_assert (static_cast<int> (InputViewType1::Rank) == 2,
114  "InputViewType1 must be a rank-2 Kokkos::View.");
115  static_assert (static_cast<int> (InputViewType2::Rank) == 2,
116  "InputViewType2 must be a rank-2 Kokkos::View.");
117  static_assert (static_cast<int> (OutputViewType::Rank) == 2,
118  "OutputViewType must be a rank-2 Kokkos::View.");
119 
120  public:
121  MultiVectorBinaryTransformLoopBody (const InputViewType1& in1,
122  const InputViewType2& in2,
123  const OutputViewType& out,
124  BinaryFunctionType f) :
125  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
126  {}
127 
128  KOKKOS_INLINE_FUNCTION void
129  operator () (const LocalIndexType i) const {
130  using LO = LocalIndexType;
131  const LO numCols = static_cast<LO> (in1_.extent (1));
132  for (LO j = 0; j < numCols; ++j) {
133  out_(i,j) = f_ (in1_(i,j), in2_(i,j));
134  }
135  }
136 
137  private:
138  InputViewType1 in1_;
139  InputViewType2 in2_;
140  OutputViewType out_;
141  BinaryFunctionType f_;
142  };
143 
144  // Kokkos::parallel_for functor that implements unary
145  // Tpetra::transform for Vector objects.
146  //
147  // The input and output Views may be the same object (locally).
148  template<class InputViewType,
149  class OutputViewType,
150  class UnaryFunctionType,
151  class LocalIndexType>
152  class VectorUnaryTransformLoopBody {
153  private:
154  static_assert (static_cast<int> (InputViewType::Rank) == 1,
155  "InputViewType must be a rank-1 Kokkos::View.");
156  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
157  "OutputViewType must be a rank-1 Kokkos::View.");
158 
159  public:
160  VectorUnaryTransformLoopBody (const InputViewType& in,
161  const OutputViewType& out,
162  UnaryFunctionType f) :
163  in_ (in), out_ (out), f_ (f)
164  {}
165 
166  KOKKOS_INLINE_FUNCTION void
167  operator () (const LocalIndexType i) const {
168  out_(i) = f_ (in_(i));
169  }
170 
171  private:
172  InputViewType in_;
173  OutputViewType out_;
174  UnaryFunctionType f_;
175  };
176 
177  // Kokkos::parallel_for functor that implements binary
178  // Tpetra::transform for Vector objects.
179  //
180  // The input and output Views may be the same object (locally).
181  template<class InputViewType1,
182  class InputViewType2,
183  class OutputViewType,
184  class BinaryFunctionType,
185  class LocalIndexType>
186  class VectorBinaryTransformLoopBody {
187  private:
188  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
189  "InputViewType1 must be a rank-1 Kokkos::View.");
190  static_assert (static_cast<int> (InputViewType1::Rank) == 1,
191  "InputViewType1 must be a rank-1 Kokkos::View.");
192  static_assert (static_cast<int> (OutputViewType::Rank) == 1,
193  "OutputViewType must be a rank-1 Kokkos::View.");
194 
195  public:
196  VectorBinaryTransformLoopBody (const InputViewType1& in1,
197  const InputViewType2& in2,
198  const OutputViewType& out,
199  BinaryFunctionType f) :
200  in1_ (in1), in2_ (in2), out_ (out), f_ (f)
201  {}
202 
203  KOKKOS_INLINE_FUNCTION void
204  operator () (const LocalIndexType i) const {
205  out_(i) = f_ (in1_(i), in2_(i));
206  }
207 
208  private:
209  InputViewType1 in1_;
210  InputViewType2 in2_;
211  OutputViewType out_;
212  BinaryFunctionType f_;
213  };
214 
215  // CUDA 9.2 doesn't like it when you call lambdas in private or
216  // protected methods (of Transform, in this case). Thus, I've
217  // broken out Transform::transform_sameObject into a separate
218  // functor and nonmember function.
219  template<class ExecutionSpace,
220  class SC, class LO, class GO, class NT,
221  class UnaryFunctionType>
222  class UnaryTransformSameMultiVector {
223  private:
225  using IST = typename MV::impl_scalar_type;
226 
227  public:
228  UnaryTransformSameMultiVector (UnaryFunctionType f) : f_ (f) {}
229 
230  KOKKOS_INLINE_FUNCTION void operator() (IST& X_ij) const {
231  // User function has the form IST(const IST&) suitable for
232  // transform, so we have to convert it to a for_each function
233  // of the form void(IST&).
234  X_ij = f_(X_ij);
235  }
236 
237  private:
238  UnaryFunctionType f_;
239  };
240 
241  template<class ExecutionSpace,
242  class SC, class LO, class GO, class NT,
243  class UnaryFunctionType>
244  void
245  unaryTransformSameMultiVector (const char kernelLabel[],
246  ExecutionSpace execSpace,
248  UnaryFunctionType f)
249  {
250  using functor_type = UnaryTransformSameMultiVector<ExecutionSpace,
251  SC, LO, GO, NT, UnaryFunctionType>;
252  ::Tpetra::for_each (kernelLabel, execSpace, output, functor_type (f));
253  }
254 
257  template<class ExecutionSpace,
258  class SC, class LO, class GO, class NT>
259  struct Transform<ExecutionSpace,
260  ::Tpetra::MultiVector<SC, LO, GO, NT> >
261  {
262  private:
263  // Given a Kokkos execution space on which the user wants to run
264  // the transform, and a memory space in which the MultiVector's
265  // data live, determine the memory space that transform should
266  // use in its withLocalAccess call.
267  template<class MemorySpace>
268  using transform_memory_space =
269  typename std::conditional<
270  Kokkos::SpaceAccessibility<
271  ExecutionSpace,
272  typename MemorySpace::memory_space>::accessible,
273  typename MemorySpace::memory_space,
274  typename ExecutionSpace::memory_space>::type;
275 
277  using preferred_memory_space =
278  typename MV::device_type::memory_space;
279  using memory_space =
280  transform_memory_space<preferred_memory_space>;
281  using IST = typename MV::impl_scalar_type;
282 
283  // This is not the same as "aliases" -- we actually want to know
284  // if input and output are the same object (locally), so that we
285  // can sync correctly. The result of transform is undefined if
286  // input and output partially alias one another.
287  static bool
288  sameObject (const ::Tpetra::MultiVector<SC, LO, GO, NT>& input,
289  const ::Tpetra::MultiVector<SC, LO, GO, NT>& output)
290  {
291  return &input == &output ||
292  input.getLocalViewHost ().data () ==
293  output.getLocalViewHost ().data () ||
294  input.getLocalViewDevice ().data () ==
295  output.getLocalViewDevice ().data ();
296  }
297 
298  template<class UnaryFunctionType>
299  static void
300  transform_vec_notSameObject
301  (const char kernelLabel[],
302  ExecutionSpace execSpace,
305  UnaryFunctionType f)
306  {
307  memory_space memSpace;
308  // Generic lambdas need C++14, so we need a typedef here.
309  using input_view_type =
311  decltype (readOnly (input).on (memSpace))>;
312  using output_view_type =
314  decltype (writeOnly (output).on (memSpace))>;
315 
317  ([=] (const input_view_type& input_lcl,
318  const output_view_type& output_lcl) {
319  using functor_type = VectorUnaryTransformLoopBody<
320  input_view_type, output_view_type, UnaryFunctionType, LO>;
321  functor_type g (input_lcl, output_lcl, f);
322 
323  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
324  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
325  range_type range (execSpace, 0, lclNumRows);
326  Kokkos::parallel_for (kernelLabel, range, g);
327  },
328  readOnly (input).on (memSpace),
329  writeOnly (output).on (memSpace));
330  }
331 
332  template<class UnaryFunctionType>
333  static void
334  transform_mv_notSameObject
335  (const char kernelLabel[],
336  ExecutionSpace execSpace,
339  UnaryFunctionType f)
340  {
341  memory_space memSpace;
342  // Generic lambdas need C++14, so we need typedefs here.
343  using input_view_type =
345  decltype (readOnly (input).on (memSpace))>;
346  using output_view_type =
348  decltype (writeOnly (output).on (memSpace))>;
349 
351  ([=] (const input_view_type& input_lcl,
352  const output_view_type& output_lcl) {
353  using functor_type = MultiVectorUnaryTransformLoopBody<
354  input_view_type, output_view_type, UnaryFunctionType, LO>;
355  functor_type g (input_lcl, output_lcl, f);
356 
357  const LO lclNumRows = static_cast<LO> (input_lcl.extent (0));
358  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
359  range_type range (execSpace, 0, lclNumRows);
360  Kokkos::parallel_for (kernelLabel, range, g);
361  },
362  readOnly (input).on (memSpace),
363  writeOnly (output).on (memSpace));
364  }
365 
366  public:
367  template<class UnaryFunctionType>
368  static void
369  transform (const char kernelLabel[],
370  ExecutionSpace execSpace,
373  UnaryFunctionType f)
374  {
375  using Teuchos::TypeNameTraits;
376  using std::endl;
377 
378  const int myRank = output.getMap ()->getComm ()->getRank ();
379  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
380  if (verbose) {
381  std::ostringstream os;
382  os << "Proc " << myRank << ": Tpetra::transform:" << endl
383  << " kernelLabel: " << kernelLabel << endl
384  << " ExecutionSpace: "
385  << TypeNameTraits<ExecutionSpace>::name () << endl;
386  std::cerr << os.str ();
387  }
388 
389  const size_t numVecs = output.getNumVectors ();
390  TEUCHOS_TEST_FOR_EXCEPTION
391  (input.getNumVectors () != numVecs, std::invalid_argument,
392  "input.getNumVectors() = " << input.getNumVectors () <<
393  " != output.getNumVectors() = " << numVecs << ".");
394 
395  const bool constStride = output.isConstantStride () &&
396  input.isConstantStride ();
397 
398  memory_space memSpace;
399  if (numVecs == size_t (1) || ! constStride) {
400  for (size_t j = 0; j < numVecs; ++j) {
401  auto output_j = output.getVectorNonConst (j);
402  auto input_j = input.getVectorNonConst (j);
403 
404  // Check for aliasing here, since it's possible for only
405  // some columns of input & output to alias. Aliasing is a
406  // correctness issue (e.g., for sync'ing).
407  if (sameObject (*output_j, *input_j)) {
408  unaryTransformSameMultiVector (kernelLabel, execSpace,
409  *output_j, f);
410  }
411  else {
412  transform_vec_notSameObject (kernelLabel, execSpace,
413  *input_j, *output_j, f);
414  }
415  }
416  }
417  else {
418  if (sameObject (output, input)) {
419  unaryTransformSameMultiVector (kernelLabel, execSpace,
420  output, f);
421  }
422  else {
423  transform_mv_notSameObject (kernelLabel, execSpace,
424  input, output, f);
425  }
426  }
427  }
428 
429  // Implementation of binary transform on MultiVectors.
430  template<class BinaryFunctionType>
431  static void
432  transform (const char kernelLabel[],
433  ExecutionSpace execSpace,
437  BinaryFunctionType f)
438  {
439  using Teuchos::TypeNameTraits;
440  using std::endl;
441  const char prefix[] = "Tpetra::transform (binary): ";
442 
443  const int myRank = output.getMap ()->getComm ()->getRank ();
444  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
445  if (verbose) {
446  std::ostringstream os;
447  os << "Proc " << myRank << ": " << prefix << endl
448  << " Tpetra::MultiVector<" << TypeNameTraits<SC>::name ()
449  << ", " << TypeNameTraits<LO>::name () << ", "
450  << TypeNameTraits<GO>::name () << ", "
451  << TypeNameTraits<NT>::name () << ">" << endl
452  << " kernelLabel: " << kernelLabel << endl
453  << " ExecutionSpace: "
454  << TypeNameTraits<ExecutionSpace>::name () << endl;
455  std::cerr << os.str ();
456  }
457 
458  const size_t numVecs = output.getNumVectors ();
459  TEUCHOS_TEST_FOR_EXCEPTION
460  (input1.getNumVectors () != numVecs, std::invalid_argument,
461  prefix << "input1.getNumVectors() = " << input1.getNumVectors ()
462  << " != output.getNumVectors() = " << numVecs << ".");
463  TEUCHOS_TEST_FOR_EXCEPTION
464  (input2.getNumVectors () != numVecs, std::invalid_argument,
465  prefix << "input2.getNumVectors() = " << input2.getNumVectors ()
466  << " != output.getNumVectors() = " << numVecs << ".");
467 
468  const bool constStride = output.isConstantStride () &&
469  input1.isConstantStride () && input2.isConstantStride ();
470  memory_space memSpace;
471 
472  const LO lclNumRows = static_cast<LO> (output.getLocalLength ());
473  using range_type = Kokkos::RangePolicy<ExecutionSpace, LO>;
474  range_type range (execSpace, 0, lclNumRows);
475 
476  if (numVecs == size_t (1) || ! constStride) { // operate on Vectors
477  for (size_t j = 0; j < numVecs; ++j) {
478  auto output_j = output.getVectorNonConst (j);
479  auto input1_j = input1.getVectorNonConst (j);
480  auto input2_j = input2.getVectorNonConst (j);
481 
482  // Check for aliasing here, since it's possible for only
483  // some columns of input & output to alias. Aliasing is a
484  // correctness issue (e.g., for sync'ing).
485  const bool outin1same = sameObject (*output_j, *input1_j);
486  const bool outin2same = sameObject (*output_j, *input2_j);
487  // Don't double-view input1.
488  const bool in1in2same = sameObject (*input1_j, *input2_j);
489  const bool allsame = outin1same && outin2same; // by transitivity
490 
491  // Help GCC 4.9.3 deduce the types of *output_j,
492  // *input1_j, and *input2_j. See discussion here:
493  // https://github.com/trilinos/Trilinos/pull/5115
494  using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;
495  vec_type& input1_j_ref = *input1_j;
496  vec_type& input2_j_ref = *input2_j;
497  vec_type& output_j_ref = *output_j;
498 
499  // Once we get C++14 generic lambdas, we can get rid of
500  // these typedefs and use "const auto&" as the argument(s)
501  // for the withLocalAccess lambdas below.
502  using input1_view_type =
504  decltype (readOnly (input1_j_ref).on (memSpace))>;
505  using input2_view_type =
507  decltype (readOnly (input2_j_ref).on (memSpace))>;
508  using rw_output_view_type =
510  decltype (readWrite (output_j_ref).on (memSpace))>;
511  using wo_output_view_type =
513  decltype (writeOnly (output_j_ref).on (memSpace))>;
514 
515  if (allsame) {
517  ([=] (const rw_output_view_type& output_lcl) {
518  using functor_type = VectorBinaryTransformLoopBody<
519  typename rw_output_view_type::const_type,
520  typename rw_output_view_type::const_type,
521  rw_output_view_type,
522  BinaryFunctionType, LO>;
523  functor_type functor (output_lcl, output_lcl, output_lcl, f);
524  Kokkos::parallel_for (kernelLabel, range, functor);
525  },
526  readWrite (output_j_ref).on (memSpace));
527  }
528  else if (in1in2same) { // and not same as output
530  ([=] (const input1_view_type& input1_lcl,
531  const wo_output_view_type& output_lcl) {
532  using functor_type = VectorBinaryTransformLoopBody<
533  input1_view_type,
534  input1_view_type,
535  wo_output_view_type,
536  BinaryFunctionType, LO>;
537  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
538  Kokkos::parallel_for (kernelLabel, range, functor);
539  },
540  readOnly (input1_j_ref).on (memSpace),
541  writeOnly (output_j_ref).on (memSpace));
542  }
543  else if (outin1same) { // and input1 not same as input2
545  ([=] (const input2_view_type& input2_lcl,
546  const rw_output_view_type& output_lcl) {
547  using functor_type = VectorBinaryTransformLoopBody<
548  typename rw_output_view_type::const_type,
549  input2_view_type,
550  rw_output_view_type,
551  BinaryFunctionType, LO>;
552  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
553  Kokkos::parallel_for (kernelLabel, range, functor);
554  },
555  readOnly (input2_j_ref).on (memSpace),
556  readWrite (output_j_ref).on (memSpace));
557  }
558  else if (outin2same) { // and input1 not same as input2
560  ([=] (const input1_view_type& input1_lcl,
561  const rw_output_view_type& output_lcl) {
562  using functor_type = VectorBinaryTransformLoopBody<
563  input1_view_type,
564  typename rw_output_view_type::const_type,
565  rw_output_view_type,
566  BinaryFunctionType, LO>;
567  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
568  Kokkos::parallel_for (kernelLabel, range, functor);
569  },
570  readOnly (input1_j_ref).on (memSpace),
571  readWrite (output_j_ref).on (memSpace));
572  }
573  else { // output, input1, and input2 all differ
575  ([=] (const input1_view_type& input1_lcl,
576  const input2_view_type& input2_lcl,
577  const wo_output_view_type& output_lcl) {
578  using functor_type = VectorBinaryTransformLoopBody<
579  input1_view_type,
580  input2_view_type,
581  wo_output_view_type,
582  BinaryFunctionType, LO>;
583  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
584  Kokkos::parallel_for (kernelLabel, range, functor);
585  },
586  readOnly (input1_j_ref).on (memSpace),
587  readOnly (input2_j_ref).on (memSpace),
588  writeOnly (output_j_ref).on (memSpace));
589  }
590  }
591  }
592  else { // operate on MultiVectors
593  // Check for aliasing here, since it's possible for only
594  // some columns of input & output to alias. Aliasing is a
595  // correctness issue (e.g., for sync'ing).
596  const bool outin1same = sameObject (output, input1);
597  const bool outin2same = sameObject (output, input2);
598  // Don't double-view input1.
599  const bool in1in2same = sameObject (input1, input2);
600  const bool allsame = outin1same && outin2same; // by transitivity
601 
602  // Once we get C++14 generic lambdas, we can get rid of
603  // these typedefs and use "const auto&" as the argument(s)
604  // for the withLocalAccess lambdas below.
605  using input1_view_type =
607  decltype (readOnly (input1).on (memSpace))>;
608  using input2_view_type =
610  decltype (readOnly (input2).on (memSpace))>;
611  using rw_output_view_type =
613  decltype (readWrite (output).on (memSpace))>;
614  using wo_output_view_type =
616  decltype (writeOnly (output).on (memSpace))>;
617 
618  if (allsame) {
620  ([=] (const rw_output_view_type& output_lcl) {
621  using functor_type = MultiVectorBinaryTransformLoopBody<
622  typename rw_output_view_type::const_type,
623  typename rw_output_view_type::const_type,
624  rw_output_view_type,
625  BinaryFunctionType, LO>;
626  functor_type functor (output_lcl, output_lcl, output_lcl, f);
627  Kokkos::parallel_for (kernelLabel, range, functor);
628  },
629  readWrite (output).on (memSpace));
630  }
631  else if (in1in2same) { // and not same as output
633  ([=] (const input1_view_type& input1_lcl,
634  const wo_output_view_type& output_lcl) {
635  using functor_type = MultiVectorBinaryTransformLoopBody<
636  input1_view_type,
637  input1_view_type,
638  wo_output_view_type,
639  BinaryFunctionType, LO>;
640  functor_type functor (input1_lcl, input1_lcl, output_lcl, f);
641  Kokkos::parallel_for (kernelLabel, range, functor);
642  },
643  readOnly (input1).on (memSpace),
644  writeOnly (output).on (memSpace));
645  }
646  else if (outin1same) { // and input1 not same as input2
648  ([=] (const input2_view_type& input2_lcl,
649  const rw_output_view_type& output_lcl) {
650  using functor_type = MultiVectorBinaryTransformLoopBody<
651  typename rw_output_view_type::const_type,
652  input2_view_type,
653  rw_output_view_type,
654  BinaryFunctionType, LO>;
655  functor_type functor (output_lcl, input2_lcl, output_lcl, f);
656  Kokkos::parallel_for (kernelLabel, range, functor);
657  },
658  readOnly (input2).on (memSpace),
659  readWrite (output).on (memSpace));
660  }
661  else if (outin2same) { // and input1 not same as input2
663  ([=] (const input1_view_type& input1_lcl,
664  const rw_output_view_type& output_lcl) {
665  using functor_type = MultiVectorBinaryTransformLoopBody<
666  input1_view_type,
667  typename rw_output_view_type::const_type,
668  rw_output_view_type,
669  BinaryFunctionType, LO>;
670  functor_type functor (input1_lcl, output_lcl, output_lcl, f);
671  Kokkos::parallel_for (kernelLabel, range, functor);
672  },
673  readOnly (input1).on (memSpace),
674  readWrite (output).on (memSpace));
675  }
676  else { // output, input1, and input2 all differ
678  ([=] (const input1_view_type& input1_lcl,
679  const input2_view_type& input2_lcl,
680  const wo_output_view_type& output_lcl) {
681  using functor_type = MultiVectorBinaryTransformLoopBody<
682  input1_view_type,
683  input2_view_type,
684  wo_output_view_type,
685  BinaryFunctionType, LO>;
686  functor_type functor (input1_lcl, input2_lcl, output_lcl, f);
687  Kokkos::parallel_for (kernelLabel, range, functor);
688  },
689  readOnly (input1).on (memSpace),
690  readOnly (input2).on (memSpace),
691  writeOnly (output).on (memSpace));
692  }
693  }
694  }
695  };
696 
703  template<class ExecutionSpace,
704  class SC, class LO, class GO, class NT>
705  struct Transform<ExecutionSpace,
706  ::Tpetra::Vector<SC, LO, GO, NT> >
707  {
708  // Implementation of unary transform on Vectors.
709  template<class UnaryFunctionType>
710  static void
711  transform (const char kernelLabel[],
712  ExecutionSpace execSpace,
715  UnaryFunctionType f)
716  {
718  using impl_type = Transform<ExecutionSpace, MV>;
719  using UFT = UnaryFunctionType;
720 
721  impl_type::template transform<UFT> (kernelLabel, execSpace,
722  input, output, f);
723  }
724 
725  // Implementation of binary transform on Vectors.
726  template<class BinaryFunctionType>
727  static void
728  transform (const char kernelLabel[],
729  ExecutionSpace execSpace,
733  BinaryFunctionType f)
734  {
736  using impl_type = Transform<ExecutionSpace, MV>;
737  using BFT = BinaryFunctionType;
738 
739  impl_type::template transform<BFT> (kernelLabel, execSpace,
740  input1, input2, output, f);
741  }
742  };
743 
744  } // namespace Details
745 
746 } // namespace Tpetra
747 
748 #endif // TPETRA_TRANSFORM_MULTIVECTOR_HPP
Include this file to make Tpetra::MultiVector and Tpetra::Vector work with Tpetra::withLocalAccess.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
Details::LocalAccess< GlobalObjectType, typename Details::DefaultMemorySpace< GlobalObjectType >::type, Details::AccessMode::WriteOnly > writeOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in write-only mode...
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
Details::LocalAccess< GlobalObjectType, typename Details::DefaultMemorySpace< GlobalObjectType >::type, Details::AccessMode::ReadWrite > readWrite(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-and-write mode...
Declaration and definition of Tpetra::transform; declaration of helper classes for users to specializ...
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
Specialize this class to implement Tpetra::transform for specific GlobalDataStructure types...
static bool verbose()
Whether Tpetra is in verbose mode.
void withLocalAccess(typename Details::ArgsToFunction< LocalAccessTypes...>::type userFunction, LocalAccessTypes...localAccesses)
Get access to a Tpetra global object&#39;s local data.
Include this file to make Tpetra::for_each work with Tpetra::MultiVector and Tpetra::Vector.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
A distributed dense vector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
static void transform(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &input, GlobalDataStructure &output, UnaryFunctionType f)
Unary transform: output_i = f(input_i).
typename Details::GetNonowningLocalObject< LocalAccessType >::nonowning_local_object_type with_local_access_function_argument_type
Type of the local object, that is an argument to the function the user gives to withLocalAccess.
void for_each(const char kernelLabel[], ExecutionSpace execSpace, GlobalDataStructure &X, UserFunctionType f)
Apply a function entrywise to each local entry of a Tpetra global data structure, analogously to std:...
Details::LocalAccess< GlobalObjectType, typename Details::DefaultMemorySpace< GlobalObjectType >::type, Details::AccessMode::ReadOnly > readOnly(GlobalObjectType &)
Declare that you want to access the given global object&#39;s local data in read-only mode...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.