42 #ifndef TPETRA_DETAILS_NORMIMPL_HPP
43 #define TPETRA_DETAILS_NORMIMPL_HPP
54 #include "TpetraCore_config.h"
55 #include "Kokkos_Core.hpp"
56 #include "Teuchos_ArrayView.hpp"
57 #include "Teuchos_CommHelpers.hpp"
58 #include "KokkosBlas.hpp"
59 #include "Kokkos_ArithTraits.hpp"
61 #ifndef DOXYGEN_SHOULD_SKIP_THIS
65 template<
class OrdinalType>
68 #endif // DOXYGEN_SHOULD_SKIP_THIS
85 template <
class ValueType,
91 const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
93 const Teuchos::ArrayView<const size_t>& whichVecs,
94 const bool isConstantStride,
95 const bool isDistributed,
96 const Teuchos::Comm<int>* comm);
109 template<
class RV,
class XMV>
111 lclNormImpl (
const RV& normsOut,
113 const size_t numVecs,
114 const Teuchos::ArrayView<const size_t>& whichVecs,
115 const bool constantStride,
119 using Kokkos::subview;
120 using mag_type =
typename RV::non_const_value_type;
122 static_assert (static_cast<int> (RV::rank) == 1,
123 "Tpetra::MultiVector::lclNormImpl: "
124 "The first argument normsOut must have rank 1.");
125 static_assert (Kokkos::is_view<XMV>::value,
126 "Tpetra::MultiVector::lclNormImpl: "
127 "The second argument X is not a Kokkos::View.");
128 static_assert (static_cast<int> (XMV::rank) == 2,
129 "Tpetra::MultiVector::lclNormImpl: "
130 "The second argument X must have rank 2.");
132 const size_t lclNumRows =
static_cast<size_t> (X.extent (0));
133 TEUCHOS_TEST_FOR_EXCEPTION
134 (lclNumRows != 0 && constantStride &&
135 static_cast<size_t> (X.extent (1)) != numVecs,
136 std::logic_error,
"Constant Stride X's dimensions are " << X.extent (0)
137 <<
" x " << X.extent (1) <<
", which differ from the local dimensions "
138 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to "
139 "the Tpetra developers.");
140 TEUCHOS_TEST_FOR_EXCEPTION
141 (lclNumRows != 0 && ! constantStride &&
142 static_cast<size_t> (X.extent (1)) < numVecs,
143 std::logic_error,
"Strided X's dimensions are " << X.extent (0) <<
" x "
144 << X.extent (1) <<
", which are incompatible with the local dimensions "
145 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to "
146 "the Tpetra developers.");
148 if (lclNumRows == 0) {
149 const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero ();
151 using execution_space =
typename RV::execution_space;
155 if (constantStride) {
156 if (whichNorm == NORM_INF) {
157 KokkosBlas::nrminf (normsOut, X);
159 else if (whichNorm == NORM_ONE) {
160 KokkosBlas::nrm1 (normsOut, X);
162 else if (whichNorm == NORM_TWO) {
163 KokkosBlas::nrm2_squared (normsOut, X);
166 TEUCHOS_TEST_FOR_EXCEPTION
167 (
true, std::logic_error,
"Should never get here!");
176 for (
size_t k = 0; k < numVecs; ++k) {
177 const size_t X_col = constantStride ? k : whichVecs[k];
178 if (whichNorm == NORM_INF) {
179 KokkosBlas::nrminf (subview (normsOut, k),
180 subview (X, ALL (), X_col));
182 else if (whichNorm == NORM_ONE) {
183 KokkosBlas::nrm1 (subview (normsOut, k),
184 subview (X, ALL (), X_col));
186 else if (whichNorm == NORM_TWO) {
187 KokkosBlas::nrm2_squared (subview (normsOut, k),
188 subview (X, ALL (), X_col));
191 TEUCHOS_TEST_FOR_EXCEPTION
192 (
true, std::logic_error,
"Should never get here!");
201 template<
class ViewType>
202 class SquareRootFunctor {
204 typedef typename ViewType::execution_space execution_space;
205 typedef typename ViewType::size_type size_type;
207 SquareRootFunctor (
const ViewType& theView) :
211 KOKKOS_INLINE_FUNCTION
void
212 operator() (
const size_type& i)
const
214 typedef typename ViewType::non_const_value_type value_type;
215 typedef Kokkos::ArithTraits<value_type> KAT;
216 theView_(i) = KAT::sqrt (theView_(i));
225 gblNormImpl (
const RV& normsOut,
226 const Teuchos::Comm<int>*
const comm,
227 const bool distributed,
230 using Teuchos::REDUCE_MAX;
231 using Teuchos::REDUCE_SUM;
232 using Teuchos::reduceAll;
233 typedef typename RV::non_const_value_type mag_type;
235 const size_t numVecs = normsOut.extent (0);
250 if (distributed && comm !=
nullptr) {
254 const int nv =
static_cast<int> (numVecs);
255 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
257 if (commIsInterComm) {
258 RV lclNorms (Kokkos::ViewAllocateWithoutInitializing (
"MV::normImpl lcl"), numVecs);
260 using execution_space =
typename RV::execution_space;
262 const mag_type*
const lclSum = lclNorms.data ();
263 mag_type*
const gblSum = normsOut.data ();
265 if (whichNorm == NORM_INF) {
266 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
268 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
271 mag_type*
const gblSum = normsOut.data ();
272 if (whichNorm == NORM_INF) {
273 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, gblSum, gblSum);
275 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, gblSum, gblSum);
280 if (whichNorm == NORM_TWO) {
286 const bool inHostMemory =
287 std::is_same<
typename RV::memory_space,
288 typename RV::host_mirror_space::memory_space>::value;
290 for (
size_t j = 0; j < numVecs; ++j) {
291 normsOut(j) = Kokkos::ArithTraits<mag_type>::sqrt (normsOut(j));
299 SquareRootFunctor<RV> f (normsOut);
300 typedef typename RV::execution_space execution_space;
301 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
302 Kokkos::parallel_for (range_type (0, numVecs), f);
309 template <
class ValueType,
315 const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
317 const Teuchos::ArrayView<const size_t>& whichVecs,
318 const bool isConstantStride,
319 const bool isDistributed,
320 const Teuchos::Comm<int>* comm)
322 using execution_space =
typename DeviceType::execution_space;
323 using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
327 const size_t numVecs = isConstantStride ?
328 static_cast<size_t> (X.extent (1)) :
329 static_cast<size_t> (whichVecs.size ());
333 RV normsOut (norms, numVecs);
335 Impl::lclNormImpl (normsOut, X, numVecs, whichVecs,
336 isConstantStride, whichNorm);
343 execution_space exec_space_instance = execution_space();
344 exec_space_instance.fence();
346 Impl::gblNormImpl (normsOut, comm, isDistributed, whichNorm);
352 #endif // TPETRA_DETAILS_NORMIMPL_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).