42 #ifndef TPETRA_DETAILS_IDOT_HPP
43 #define TPETRA_DETAILS_IDOT_HPP
63 #include "Tpetra_MultiVector.hpp"
64 #include "Tpetra_Vector.hpp"
65 #include "KokkosBlas1_dot.hpp"
101 template<
class SC,
class LO,
class GO,
class NT>
103 lclDotRaw (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*
const resultRaw,
104 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
105 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y,
106 const bool resultOnDevice)
108 using ::Kokkos::Impl::MemorySpaceAccess;
110 using ::Kokkos::HostSpace;
111 using ::Kokkos::subview;
112 using ::Kokkos::View;
113 typedef ::Kokkos::pair<size_t, size_t> pair_type;
114 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
115 typedef typename MV::dot_type dot_type;
116 typedef typename MV::device_type device_type;
117 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
118 typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
119 typedef View<dot_type*, device_type> result_dev_view_type;
120 typedef typename result_dev_view_type::HostMirror result_host_view_type;
122 const size_t numRows = X.getLocalLength ();
123 const pair_type rowRange (0, numRows);
124 const size_t X_numVecs = X.getNumVectors ();
125 const size_t Y_numVecs = Y.getNumVectors ();
126 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
130 if (X_numVecs != Y_numVecs &&
131 X_numVecs !=
size_t (1) &&
132 Y_numVecs !=
size_t (1)) {
133 std::ostringstream os;
134 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
135 <<
" != Y.getNumVectors() = " << Y_numVecs
136 <<
", but neither is 1.";
137 throw std::invalid_argument (os.str ());
140 const bool X_latestOnHost = X.template need_sync<dev_memory_space> () &&
141 ! X.template need_sync<host_memory_space> ();
142 const bool Y_latestOnHost = Y.template need_sync<dev_memory_space> () &&
143 ! Y.template need_sync<host_memory_space> ();
145 const bool runOnHost = X_latestOnHost;
150 const bool Y_copyToHost = (X_latestOnHost && ! Y_latestOnHost);
151 const bool Y_copyToDev = (! X_latestOnHost && Y_latestOnHost);
153 result_dev_view_type result;
154 result_host_view_type result_h;
155 if (resultOnDevice) {
156 result = result_dev_view_type (resultRaw, numVecs);
161 result_h = ::Kokkos::create_mirror_view (result);
166 result_h = result_host_view_type (resultRaw, numVecs);
170 result = result_dev_view_type (
"result", numVecs);
175 const bool X_or_Y_have_nonconstant_stride =
176 ! X.isConstantStride () || ! Y.isConstantStride ();
177 if (X_or_Y_have_nonconstant_stride && numVecs >
size_t (1)) {
189 for (
size_t j = 0; j < numVecs; ++j) {
192 const size_t X_col = (X_numVecs == size_t (1)) ?
size_t (0) : j;
193 const size_t Y_col = (Y_numVecs == size_t (1)) ?
size_t (0) : j;
194 auto X_j = X.getVector (X_col);
195 auto Y_j = Y.getVector (Y_col);
198 auto X_j_2d_h = X_j->template getLocalView<host_memory_space> ();
199 auto X_j_1d_h = subview (X_j_2d_h, rowRange, 0);
200 decltype (X_j_1d_h) Y_j_1d_h;
203 auto Y_j_2d = Y_j->template getLocalView<dev_memory_space> ();
204 auto Y_j_1d = subview (Y_j_2d, rowRange, 0);
205 typename decltype (Y_j_1d_h)::non_const_type
206 Y_j_1d_h_nc (
"Y_j_1d_h", Y_j_1d.extent (0));
208 Y_j_1d_h = Y_j_1d_h_nc;
211 auto Y_j_2d_h = Y_j->template getLocalView<host_memory_space> ();
212 Y_j_1d_h = subview (Y_j_2d_h, rowRange, 0);
214 auto result_h_j = subview (result_h, j);
215 KokkosBlas::dot (result_h_j, X_j_1d_h, Y_j_1d_h);
218 auto X_j_2d = X_j->template getLocalView<dev_memory_space> ();
219 auto X_j_1d = subview (X_j_2d, rowRange, 0);
220 decltype (X_j_1d) Y_j_1d;
223 auto Y_j_2d_h = Y_j->template getLocalView<host_memory_space> ();
224 auto Y_j_1d_h = subview (Y_j_2d_h, rowRange, 0);
225 typename decltype (Y_j_1d)::non_const_type
226 Y_j_1d_nc (
"Y_j_1d", Y_j_1d_h.extent (0));
231 auto Y_j_2d = Y_j->template getLocalView<dev_memory_space> ();
232 Y_j_1d = subview (Y_j_2d, rowRange, 0);
234 auto result_j = subview (result, j);
235 KokkosBlas::dot (result_j, X_j_1d, Y_j_1d);
242 auto X_lcl_h = X.template getLocalView<host_memory_space> ();
243 decltype (X_lcl_h) Y_lcl_h;
246 auto Y_lcl = Y.template getLocalView<dev_memory_space> ();
247 typename decltype (Y_lcl_h)::non_const_type
248 Y_lcl_h_nc (
"Y_lcl_h", Y_lcl.extent (0), Y_numVecs);
250 Y_lcl_h = Y_lcl_h_nc;
253 Y_lcl_h = Y.template getLocalView<host_memory_space> ();
256 if (numVecs ==
size_t (1)) {
257 auto X_lcl_h_1d = subview (X_lcl_h, rowRange, 0);
258 auto Y_lcl_h_1d = subview (Y_lcl_h, rowRange, 0);
259 auto result_h_0d = subview (result_h, 0);
260 KokkosBlas::dot (result_h_0d, X_lcl_h_1d, Y_lcl_h_1d);
263 auto X_lcl_h_2d = subview (X_lcl_h, rowRange,
264 pair_type (0, X_numVecs));
265 auto Y_lcl_h_2d = subview (Y_lcl_h, rowRange,
266 pair_type (0, Y_numVecs));
267 KokkosBlas::dot (result_h, X_lcl_h_2d, Y_lcl_h_2d);
271 auto X_lcl = X.template getLocalView<dev_memory_space> ();
272 decltype (X_lcl) Y_lcl;
275 auto Y_lcl_h = Y.template getLocalView<host_memory_space> ();
276 typename decltype (Y_lcl)::non_const_type
277 Y_lcl_nc (
"Y_lcl", Y_lcl_h.extent (0), Y_numVecs);
282 Y_lcl = Y.template getLocalView<dev_memory_space> ();
285 if (numVecs ==
size_t (1)) {
286 auto X_lcl_1d = subview (X_lcl, rowRange, 0);
287 auto Y_lcl_1d = subview (Y_lcl, rowRange, 0);
288 auto result_0d = subview (result, 0);
289 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
293 auto X_lcl_2d = subview (X_lcl, rowRange,
294 pair_type (0, X_numVecs));
295 auto Y_lcl_2d = subview (Y_lcl, rowRange,
296 pair_type (0, Y_numVecs));
297 KokkosBlas::dot (result, X_lcl_2d, Y_lcl_2d);
303 if (runOnHost && resultOnDevice) {
307 else if (! runOnHost && ! resultOnDevice) {
371 template<
class SC,
class LO,
class GO,
class NT>
372 std::shared_ptr< ::Tpetra::Details::CommRequest>
373 idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
374 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
375 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
377 using ::Kokkos::Impl::MemorySpaceAccess;
378 using ::Kokkos::HostSpace;
379 using ::Kokkos::View;
381 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
382 typedef typename MV::dot_type dot_type;
383 typedef typename MV::device_type device_type;
384 typedef View<dot_type*, device_type> result_dev_view_type;
385 typedef typename result_dev_view_type::HostMirror result_host_view_type;
386 typedef typename device_type::memory_space dev_memory_space;
388 auto map = X.getMap ();
389 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
390 if (comm.is_null ()) {
391 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
394 const size_t X_numVecs = X.getNumVectors ();
395 const size_t Y_numVecs = Y.getNumVectors ();
396 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
400 if (X_numVecs != Y_numVecs &&
401 X_numVecs !=
size_t (1) &&
402 Y_numVecs != size_t (1)) {
403 std::ostringstream os;
404 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
405 <<
" != Y.getNumVectors() = " << Y_numVecs
406 <<
", but neither is 1.";
407 throw std::invalid_argument (os.str ());
417 const bool resultOnDevice =
418 MemorySpaceAccess<dev_memory_space, HostSpace>::accessible;
419 if (resultOnDevice) {
420 result_dev_view_type gblResult (resultRaw, numVecs);
421 result_dev_view_type lclResult = needCopy ?
422 result_dev_view_type (
"lclResult", numVecs) :
425 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
428 result_host_view_type gblResult (resultRaw, numVecs);
429 result_host_view_type lclResult = needCopy ?
430 result_host_view_type (
"lclResult", numVecs) :
433 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
478 template<
class SC,
class LO,
class GO,
class NT>
479 std::shared_ptr< ::Tpetra::Details::CommRequest>
480 idot (
const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
481 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
482 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
483 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
485 using ::Kokkos::Impl::MemorySpaceAccess;
486 using ::Kokkos::HostSpace;
487 using ::Kokkos::View;
489 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
490 typedef typename MV::dot_type dot_type;
491 typedef typename MV::device_type device_type;
492 typedef View<dot_type, device_type> result_view_type;
494 auto map = X.getMap ();
495 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
496 if (comm.is_null ()) {
497 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
505 constexpr
bool resultOnDevice =
true;
506 result_view_type gblResult = result;
507 result_view_type lclResult = needCopy ?
508 result_view_type (
"lclResult") :
511 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
576 template<
class SC,
class LO,
class GO,
class NT>
577 std::shared_ptr< ::Tpetra::Details::CommRequest>
578 idot (
const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
579 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
580 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
581 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
583 using ::Kokkos::Impl::MemorySpaceAccess;
584 using ::Kokkos::HostSpace;
585 using ::Kokkos::View;
587 typedef ::Tpetra::MultiVector<SC, LO, GO, NT> MV;
588 typedef typename MV::dot_type dot_type;
589 typedef typename MV::device_type device_type;
590 typedef View<dot_type*, device_type> result_view_type;
592 auto map = X.getMap ();
593 auto comm = map.is_null () ? ::Teuchos::null : map->getComm ();
594 if (comm.is_null ()) {
595 return std::shared_ptr< ::Tpetra::Details::CommRequest> (NULL);
603 const size_t X_numVecs = X.getNumVectors ();
604 const size_t Y_numVecs = Y.getNumVectors ();
605 if (X_numVecs != Y_numVecs &&
606 X_numVecs !=
size_t (1) &&
607 Y_numVecs !=
size_t (1)) {
608 std::ostringstream os;
609 os <<
"Tpetra::idot: X.getNumVectors() = " << X_numVecs
610 <<
" != Y.getNumVectors() = " << Y_numVecs
611 <<
", but neither is 1.";
612 throw std::invalid_argument (os.str ());
620 constexpr
bool resultOnDevice =
true;
621 result_view_type gblResult = result;
622 result_view_type lclResult = needCopy ?
623 result_view_type (
"lclResult", result.extent (0)) :
626 return iallreduce (lclResult, gblResult, ::Teuchos::REDUCE_SUM, *comm);
631 #endif // TPETRA_DETAILS_IDOT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator. ...
Declaration of Tpetra::Details::isInterComm.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs, and raw pointer or raw array output.
void lclDotRaw(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *const resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y, const bool resultOnDevice)
Compute the local dot product(s), columnwise, of X and Y.
Declaration of Tpetra::Details::iallreduce.
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.