42 #ifndef TPETRA_DETAILS_IDOT_HPP
43 #define TPETRA_DETAILS_IDOT_HPP
63 #include "Tpetra_MultiVector.hpp"
64 #include "Tpetra_Vector.hpp"
65 #include "KokkosBlas1_dot.hpp"
101 template<
class SC,
class LO,
class GO,
class NT>
103 lclDotRaw (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*
const resultRaw,
104 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
105 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y,
106 const bool resultOnDevice)
108 using ::Kokkos::Impl::MemorySpaceAccess;
110 using ::Kokkos::HostSpace;
111 using ::Kokkos::subview;
112 using ::Kokkos::View;
113 typedef ::Kokkos::pair<size_t, size_t> pair_type;
114 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
115 typedef typename MV::dot_type dot_type;
116 typedef typename MV::device_type device_type;
117 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
118 typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
119 typedef View<dot_type*, device_type> result_dev_view_type;
120 typedef typename result_dev_view_type::HostMirror result_host_view_type;
122 const size_t numRows = X.getLocalLength ();
123 const pair_type rowRange (0, numRows);
124 const size_t X_numVecs = X.getNumVectors ();
125 const size_t Y_numVecs = Y.getNumVectors ();
126 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
130 if (X_numVecs != Y_numVecs &&
131 X_numVecs !=
size_t (1) &&
132 Y_numVecs !=
size_t (1)) {
133 std::ostringstream os;
134 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
135 <<
" != Y.getNumVectors() = " << Y_numVecs
136 <<
", but neither is 1.";
137 throw std::invalid_argument (os.str ());
140 const bool X_latestOnHost = X.template need_sync<dev_memory_space> () &&
141 ! X.template need_sync<host_memory_space> ();
142 const bool Y_latestOnHost = Y.template need_sync<dev_memory_space> () &&
143 ! Y.template need_sync<host_memory_space> ();
145 const bool runOnHost = X_latestOnHost;
150 const bool Y_copyToHost = (X_latestOnHost && ! Y_latestOnHost);
151 const bool Y_copyToDev = (! X_latestOnHost && Y_latestOnHost);
153 result_dev_view_type result;
154 result_host_view_type result_h;
155 if (resultOnDevice) {
156 result = result_dev_view_type (resultRaw, numVecs);
161 result_h = ::Kokkos::create_mirror_view (result);
166 result_h = result_host_view_type (resultRaw, numVecs);
170 result = result_dev_view_type (
"result", numVecs);
175 const bool X_or_Y_have_nonconstant_stride =
176 ! X.isConstantStride () || ! Y.isConstantStride ();
177 if (X_or_Y_have_nonconstant_stride && numVecs >
size_t (1)) {
189 for (
size_t j = 0; j < numVecs; ++j) {
192 const size_t X_col = (X_numVecs == size_t (1)) ?
size_t (0) : j;
193 const size_t Y_col = (Y_numVecs == size_t (1)) ?
size_t (0) : j;
194 auto X_j = X.getVector (X_col);
195 auto Y_j = Y.getVector (Y_col);
198 auto X_j_2d_h = X_j->template getLocalView<host_memory_space> ();
199 auto X_j_1d_h = subview (X_j_2d_h, rowRange, 0);
200 decltype (X_j_1d_h) Y_j_1d_h;
203 auto Y_j_2d = Y_j->template getLocalView<dev_memory_space> ();
204 auto Y_j_1d = subview (Y_j_2d, rowRange, 0);
205 typename decltype (Y_j_1d_h)::non_const_type
206 Y_j_1d_h_nc (
"Y_j_1d_h", Y_j_1d.extent (0));
208 Y_j_1d_h = Y_j_1d_h_nc;
211 auto Y_j_2d_h = Y_j->template getLocalView<host_memory_space> ();
212 Y_j_1d_h = subview (Y_j_2d_h, rowRange, 0);
214 auto result_h_j = subview (result_h, j);
215 KokkosBlas::dot (result_h_j, X_j_1d_h, Y_j_1d_h);
218 auto X_j_2d = X_j->template getLocalView<dev_memory_space> ();
219 auto X_j_1d = subview (X_j_2d, rowRange, 0);
220 decltype (X_j_1d) Y_j_1d;
223 auto Y_j_2d_h = Y_j->template getLocalView<host_memory_space> ();
224 auto Y_j_1d_h = subview (Y_j_2d_h, rowRange, 0);
225 typename decltype (Y_j_1d)::non_const_type
226 Y_j_1d_nc (
"Y_j_1d", Y_j_1d_h.extent (0));
231 auto Y_j_2d = Y_j->template getLocalView<dev_memory_space> ();
232 Y_j_1d = subview (Y_j_2d, rowRange, 0);
234 auto result_j = subview (result, j);
235 KokkosBlas::dot (result_j, X_j_1d, Y_j_1d);
241 auto X_lcl_h = X.template getLocalView<host_memory_space> ();
242 decltype (X_lcl_h) Y_lcl_h;
245 auto Y_lcl = Y.template getLocalView<dev_memory_space> ();
246 typename decltype (Y_lcl_h)::non_const_type
247 Y_lcl_h_nc (
"Y_lcl_h", Y_lcl.extent (0), Y_numVecs);
249 Y_lcl_h = Y_lcl_h_nc;
252 Y_lcl_h = Y.template getLocalView<host_memory_space> ();
255 if (numVecs ==
size_t (1)) {
256 auto X_lcl_h_1d = subview (X_lcl_h, rowRange, 0);
257 auto Y_lcl_h_1d = subview (Y_lcl_h, rowRange, 0);
258 auto result_h_0d = subview (result_h, 0);
259 KokkosBlas::dot (result_h_0d, X_lcl_h_1d, Y_lcl_h_1d);
262 auto X_lcl_h_2d = subview (X_lcl_h, rowRange,
263 pair_type (0, X_numVecs));
264 auto Y_lcl_h_2d = subview (Y_lcl_h, rowRange,
265 pair_type (0, Y_numVecs));
266 KokkosBlas::dot (result_h, X_lcl_h_2d, Y_lcl_h_2d);
270 auto X_lcl = X.template getLocalView<dev_memory_space> ();
271 decltype (X_lcl) Y_lcl;
274 auto Y_lcl_h = Y.template getLocalView<host_memory_space> ();
275 typename decltype (Y_lcl)::non_const_type
276 Y_lcl_nc (
"Y_lcl", Y_lcl_h.extent (0), Y_numVecs);
281 Y_lcl = Y.template getLocalView<dev_memory_space> ();
284 if (numVecs ==
size_t (1)) {
285 auto X_lcl_1d = subview (X_lcl, rowRange, 0);
286 auto Y_lcl_1d = subview (Y_lcl, rowRange, 0);
287 auto result_0d = subview (result, 0);
288 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
291 auto X_lcl_2d = subview (X_lcl, rowRange,
292 pair_type (0, X_numVecs));
293 auto Y_lcl_2d = subview (Y_lcl, rowRange,
294 pair_type (0, Y_numVecs));
295 KokkosBlas::dot (result, X_lcl_2d, Y_lcl_2d);
300 if (runOnHost && resultOnDevice) {
304 else if (! runOnHost && ! resultOnDevice) {
368 template<
class SC,
class LO,
class GO,
class NT>
369 std::shared_ptr< ::Tpetra::Details::CommRequest>
370 idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
371 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
372 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
374 using ::Kokkos::Impl::MemorySpaceAccess;
375 using ::Kokkos::HostSpace;
376 using ::Kokkos::View;
378 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
379 typedef typename MV::dot_type dot_type;
380 typedef typename MV::device_type device_type;
381 typedef View<dot_type*, device_type> result_dev_view_type;
382 typedef typename result_dev_view_type::HostMirror result_host_view_type;
383 typedef typename device_type::memory_space dev_memory_space;
385 auto map = X.getMap ();
386 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
387 if (comm.is_null ()) {
388 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
391 const size_t X_numVecs = X.getNumVectors ();
392 const size_t Y_numVecs = Y.getNumVectors ();
393 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
397 if (X_numVecs != Y_numVecs &&
398 X_numVecs !=
size_t (1) &&
399 Y_numVecs != size_t (1)) {
400 std::ostringstream os;
401 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
402 <<
" != Y.getNumVectors() = " << Y_numVecs
403 <<
", but neither is 1.";
404 throw std::invalid_argument (os.str ());
414 const bool resultOnDevice =
415 MemorySpaceAccess<dev_memory_space, HostSpace>::accessible;
416 if (resultOnDevice) {
417 result_dev_view_type gblResult (resultRaw, numVecs);
418 result_dev_view_type lclResult = needCopy ?
419 result_dev_view_type (
"lclResult", numVecs) :
422 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
425 result_host_view_type gblResult (resultRaw, numVecs);
426 result_host_view_type lclResult = needCopy ?
427 result_host_view_type (
"lclResult", numVecs) :
430 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
475 template<
class SC,
class LO,
class GO,
class NT>
476 std::shared_ptr< ::Tpetra::Details::CommRequest>
477 idot (
const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
478 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
479 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
480 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
482 using ::Kokkos::Impl::MemorySpaceAccess;
483 using ::Kokkos::HostSpace;
484 using ::Kokkos::View;
486 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
487 typedef typename MV::dot_type dot_type;
488 typedef typename MV::device_type device_type;
489 typedef View<dot_type, device_type> result_view_type;
491 auto map = X.getMap ();
492 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
493 if (comm.is_null ()) {
494 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
502 constexpr
bool resultOnDevice =
true;
503 result_view_type gblResult = result;
504 result_view_type lclResult = needCopy ?
505 result_view_type (
"lclResult") :
508 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
573 template<
class SC,
class LO,
class GO,
class NT>
574 std::shared_ptr< ::Tpetra::Details::CommRequest>
575 idot (
const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
576 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
577 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
578 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
580 using ::Kokkos::Impl::MemorySpaceAccess;
581 using ::Kokkos::HostSpace;
582 using ::Kokkos::View;
584 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
585 typedef typename MV::dot_type dot_type;
586 typedef typename MV::device_type device_type;
587 typedef View<dot_type*, device_type> result_view_type;
589 auto map = X.getMap ();
590 auto comm = map.is_null () ? ::Teuchos::null : map->getComm ();
591 if (comm.is_null ()) {
592 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
600 const size_t X_numVecs = X.getNumVectors ();
601 const size_t Y_numVecs = Y.getNumVectors ();
602 if (X_numVecs != Y_numVecs &&
603 X_numVecs !=
size_t (1) &&
604 Y_numVecs !=
size_t (1)) {
605 std::ostringstream os;
606 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
607 <<
" != Y.getNumVectors() = " << Y_numVecs
608 <<
", but neither is 1.";
609 throw std::invalid_argument (os.str ());
617 constexpr
bool resultOnDevice =
true;
618 result_view_type gblResult = result;
619 result_view_type lclResult = needCopy ?
620 result_view_type (
"lclResult", result.extent (0)) :
623 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
628 #endif // TPETRA_DETAILS_IDOT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator. ...
Declaration of Tpetra::Details::isInterComm.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs, and raw pointer or raw array output.
void lclDotRaw(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *const resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y, const bool resultOnDevice)
Compute the local dot product(s), columnwise, of X and Y.
Declaration of Tpetra::Details::iallreduce.
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.