42 #ifndef TPETRA_DETAILS_IDOT_HPP
43 #define TPETRA_DETAILS_IDOT_HPP
62 #include "Tpetra_MultiVector.hpp"
63 #include "Tpetra_Vector.hpp"
64 #include "Teuchos_CommHelpers.hpp"
65 #include "KokkosBlas1_dot.hpp"
78 using DevView =
typename MV::dual_view_type::t_dev::const_type;
79 using HostView =
typename MV::dual_view_type::t_host::const_type;
81 template<
typename exec_space>
82 static DevView
get(
const MV& x,
typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* =
nullptr)
84 return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
87 template<
typename exec_space>
88 static HostView
get(
const MV& x,
typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* =
nullptr)
90 return x.getLocalViewHost(Tpetra::Access::ReadOnly);
96 template<
class MV,
class ResultView,
bool runOnDevice>
101 using pair_type = Kokkos::pair<size_t, size_t>;
102 using exec_space =
typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
104 static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105 "idotLocal: Execution space must be able to access localResult");
108 Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
109 localResultUnmanaged(localResult.data(), localResult.extent(0));
110 const size_t numRows = X.getLocalLength ();
111 const pair_type rowRange (0, numRows);
112 const size_t X_numVecs = X.getNumVectors ();
113 const size_t Y_numVecs = Y.getNumVectors ();
114 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
117 if (X_numVecs != Y_numVecs &&
118 X_numVecs !=
size_t (1) &&
119 Y_numVecs !=
size_t (1)) {
120 std::ostringstream os;
121 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
122 <<
" != Y.getNumVectors() = " << Y_numVecs
123 <<
", but neither is 1.";
124 throw std::invalid_argument (os.str ());
126 auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
127 auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
129 bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
132 if (numVecs ==
size_t (1)) {
133 auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
134 auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
135 auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
136 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
139 auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
140 auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
141 KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
146 auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
147 auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
149 for(
size_t vec = 0; vec < numVecs; vec++) {
152 size_t Xj = (numVecs == X_numVecs) ? vec : 0;
153 Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
154 size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
155 Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
156 auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
157 auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
160 KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
166 template<
typename MV,
typename ResultView>
169 using dot_type =
typename MV::dot_type;
172 template<
typename exec_space>
173 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
174 const ResultView& globalResult,
const MV& X,
const MV& Y,
175 typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* =
nullptr)
178 constexpr
bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
179 idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
182 exec_space().fence();
183 auto comm = X.getMap()->getComm();
184 return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
188 template<
typename exec_space>
189 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
190 const ResultView& globalResult,
const MV& X,
const MV& Y,
191 typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* =
nullptr)
193 constexpr
bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
194 Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing(
"idot:localResult"), X.getNumVectors());
195 idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
198 exec_space().fence();
199 auto comm = X.getMap()->getComm();
200 return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
208 template<
class MV,
class ResultView>
209 std::shared_ptr< ::Tpetra::Details::CommRequest>
214 static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
215 "Tpetra::idot: result view's element type must match MV::dot_type");
218 if(X.need_sync_device())
221 return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
226 return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
287 template<
class SC,
class LO,
class GO,
class NT>
288 std::shared_ptr< ::Tpetra::Details::CommRequest>
289 idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
290 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
291 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
293 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
294 const size_t X_numVecs = X.getNumVectors ();
295 const size_t Y_numVecs = Y.getNumVectors ();
296 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
297 Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
298 resultView(resultRaw, numVecs);
364 template<
class SC,
class LO,
class GO,
class NT>
365 std::shared_ptr< ::Tpetra::Details::CommRequest>
366 idot (
const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
367 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
368 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
369 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
415 template<
class SC,
class LO,
class GO,
class NT>
416 std::shared_ptr< ::Tpetra::Details::CommRequest>
417 idot (
const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
418 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
419 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
420 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
422 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
423 using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
424 Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
430 #endif // TPETRA_DETAILS_IDOT_HPP
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction...
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs, and raw pointer or raw array output.
Declaration of Tpetra::iallreduce.