Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_normImpl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_NORMIMPL_HPP
11 #define TPETRA_DETAILS_NORMIMPL_HPP
12 
21 
22 #include "TpetraCore_config.h"
23 #include "Kokkos_Core.hpp"
24 #include "Teuchos_ArrayView.hpp"
25 #include "Teuchos_CommHelpers.hpp"
26 #include "KokkosBlas.hpp"
27 #if KOKKOS_VERSION >= 40799
28 #include "KokkosKernels_ArithTraits.hpp"
29 #else
30 #include "Kokkos_ArithTraits.hpp"
31 #endif
32 
33 #ifndef DOXYGEN_SHOULD_SKIP_THIS
34 namespace Teuchos {
35 template <class T>
36 class ArrayView; // forward declaration
37 template <class OrdinalType>
38 class Comm; // forward declaration
39 } // namespace Teuchos
40 #endif // DOXYGEN_SHOULD_SKIP_THIS
41 
43 // Declarations start here
45 
46 namespace Tpetra {
47 namespace Details {
48 
50 enum EWhichNorm {
51  NORM_ONE, //<! Use the one-norm
52  NORM_TWO, //<! Use the two-norm
53  NORM_INF //<! Use the infinity-norm
54 };
55 
57 template <class ValueType,
58  class ArrayLayout,
59  class DeviceType,
60  class MagnitudeType>
61 void normImpl(MagnitudeType norms[],
62  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
63  const EWhichNorm whichNorm,
64  const Teuchos::ArrayView<const size_t>& whichVecs,
65  const bool isConstantStride,
66  const bool isDistributed,
67  const Teuchos::Comm<int>* comm);
68 
69 } // namespace Details
70 } // namespace Tpetra
71 
73 // Definitions start here
75 
76 namespace Tpetra {
77 namespace Details {
78 namespace Impl {
79 
80 template <class RV, class XMV>
81 void lclNormImpl(const RV& normsOut,
82  const XMV& X,
83  const size_t numVecs,
84  const Teuchos::ArrayView<const size_t>& whichVecs,
85  const bool constantStride,
86  const EWhichNorm whichNorm) {
87  using Kokkos::ALL;
88  using Kokkos::subview;
89  using mag_type = typename RV::non_const_value_type;
90 
91  static_assert(static_cast<int>(RV::rank) == 1,
92  "Tpetra::MultiVector::lclNormImpl: "
93  "The first argument normsOut must have rank 1.");
94  static_assert(Kokkos::is_view<XMV>::value,
95  "Tpetra::MultiVector::lclNormImpl: "
96  "The second argument X is not a Kokkos::View.");
97  static_assert(static_cast<int>(XMV::rank) == 2,
98  "Tpetra::MultiVector::lclNormImpl: "
99  "The second argument X must have rank 2.");
100 
101  const size_t lclNumRows = static_cast<size_t>(X.extent(0));
102  TEUCHOS_TEST_FOR_EXCEPTION(lclNumRows != 0 && constantStride &&
103  static_cast<size_t>(X.extent(1)) != numVecs,
104  std::logic_error, "Constant Stride X's dimensions are " << X.extent(0) << " x " << X.extent(1) << ", which differ from the local dimensions " << lclNumRows << " x " << numVecs << ". Please report this bug to "
105  "the Tpetra developers.");
106  TEUCHOS_TEST_FOR_EXCEPTION(lclNumRows != 0 && !constantStride &&
107  static_cast<size_t>(X.extent(1)) < numVecs,
108  std::logic_error, "Strided X's dimensions are " << X.extent(0) << " x " << X.extent(1) << ", which are incompatible with the local dimensions " << lclNumRows << " x " << numVecs << ". Please report this bug to "
109  "the Tpetra developers.");
110 
111  if (lclNumRows == 0) {
112 #if KOKKOS_VERSION >= 40799
113  const mag_type zeroMag = KokkosKernels::ArithTraits<mag_type>::zero();
114 #else
115  const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero();
116 #endif
117  // DEEP_COPY REVIEW - VALUE-TO-DEVICE
118  using execution_space = typename RV::execution_space;
119  Kokkos::deep_copy(execution_space(), normsOut, zeroMag);
120  } else { // lclNumRows != 0
121  if (constantStride) {
122  if (whichNorm == NORM_INF) {
123  KokkosBlas::nrminf(normsOut, X);
124  } else if (whichNorm == NORM_ONE) {
125  KokkosBlas::nrm1(normsOut, X);
126  } else if (whichNorm == NORM_TWO) {
127  KokkosBlas::nrm2_squared(normsOut, X);
128  } else {
129  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Should never get here!");
130  }
131  } else { // not constant stride
132  // NOTE (mfh 15 Jul 2014, 11 Apr 2019) This does a kernel launch
133  // for every column. It might be better to have a kernel that
134  // does the work all at once. On the other hand, we don't
135  // prioritize performance of MultiVector views of noncontiguous
136  // columns.
137  for (size_t k = 0; k < numVecs; ++k) {
138  const size_t X_col = constantStride ? k : whichVecs[k];
139  if (whichNorm == NORM_INF) {
140  KokkosBlas::nrminf(subview(normsOut, k),
141  subview(X, ALL(), X_col));
142  } else if (whichNorm == NORM_ONE) {
143  KokkosBlas::nrm1(subview(normsOut, k),
144  subview(X, ALL(), X_col));
145  } else if (whichNorm == NORM_TWO) {
146  KokkosBlas::nrm2_squared(subview(normsOut, k),
147  subview(X, ALL(), X_col));
148  } else {
149  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Should never get here!");
150  }
151  } // for each column
152  } // constantStride
153  } // lclNumRows != 0
154 }
155 
156 // Kokkos::parallel_for functor for applying square root to each
157 // entry of a 1-D Kokkos::View.
158 template <class ViewType>
159 class SquareRootFunctor {
160  public:
161  typedef typename ViewType::execution_space execution_space;
162  typedef typename ViewType::size_type size_type;
163 
164  SquareRootFunctor(const ViewType& theView)
165  : theView_(theView) {}
166 
167  KOKKOS_INLINE_FUNCTION void
168  operator()(const size_type& i) const {
169  typedef typename ViewType::non_const_value_type value_type;
170 #if KOKKOS_VERSION >= 40799
171  typedef KokkosKernels::ArithTraits<value_type> KAT;
172 #else
173  typedef Kokkos::ArithTraits<value_type> KAT;
174 #endif
175  theView_(i) = KAT::sqrt(theView_(i));
176  }
177 
178  private:
179  ViewType theView_;
180 };
181 
182 template <class RV>
183 void gblNormImpl(const RV& normsOut,
184  const Teuchos::Comm<int>* const comm,
185  const bool distributed,
186  const EWhichNorm whichNorm) {
187  using Teuchos::REDUCE_MAX;
188  using Teuchos::REDUCE_SUM;
189  using Teuchos::reduceAll;
190  typedef typename RV::non_const_value_type mag_type;
191 
192  const size_t numVecs = normsOut.extent(0);
193 
194  // If the MultiVector is distributed over multiple processes, do
195  // the distributed (interprocess) part of the norm. We assume
196  // that the MPI implementation can read from and write to device
197  // memory.
198  //
199  // replaceMap() may have removed some processes. Those processes
200  // have a null Map. They must not participate in any collective
201  // operations. We ask first whether the Map is null, because
202  // isDistributed() defers that question to the Map. We still
203  // compute and return local norms for processes not participating
204  // in collective operations; those probably don't make any sense,
205  // but it doesn't hurt to do them, since it's illegal to call
206  // norm*() on those processes anyway.
207  if (distributed && comm != nullptr) {
208  // The calling process only participates in the collective if
209  // both the Map and its Comm on that process are nonnull.
210 
211  const int nv = static_cast<int>(numVecs);
212  const bool commIsInterComm = ::Tpetra::Details::isInterComm(*comm);
213 
214  if (commIsInterComm) {
215  RV lclNorms(Kokkos::ViewAllocateWithoutInitializing("MV::normImpl lcl"), numVecs);
216  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
217  using execution_space = typename RV::execution_space;
218  Kokkos::deep_copy(execution_space(), lclNorms, normsOut);
219  const mag_type* const lclSum = lclNorms.data();
220  mag_type* const gblSum = normsOut.data();
221 
222  if (whichNorm == NORM_INF) {
223  reduceAll<int, mag_type>(*comm, REDUCE_MAX, nv, lclSum, gblSum);
224  } else {
225  reduceAll<int, mag_type>(*comm, REDUCE_SUM, nv, lclSum, gblSum);
226  }
227  } else {
228  mag_type* const gblSum = normsOut.data();
229  if (whichNorm == NORM_INF) {
230  reduceAll<int, mag_type>(*comm, REDUCE_MAX, nv, gblSum, gblSum);
231  } else {
232  reduceAll<int, mag_type>(*comm, REDUCE_SUM, nv, gblSum, gblSum);
233  }
234  }
235  }
236 
237  if (whichNorm == NORM_TWO) {
238  // Replace the norm-squared results with their square roots in
239  // place, to get the final output. If the device memory and
240  // the host memory are the same, it probably doesn't pay to
241  // launch a parallel kernel for that, since there isn't enough
242  // parallelism for the typical MultiVector case.
243  const bool inHostMemory =
244  std::is_same<typename RV::memory_space,
245  typename RV::host_mirror_space::memory_space>::value;
246  if (inHostMemory) {
247  for (size_t j = 0; j < numVecs; ++j) {
248 #if KOKKOS_VERSION >= 40799
249  normsOut(j) = KokkosKernels::ArithTraits<mag_type>::sqrt(normsOut(j));
250 #else
251  normsOut(j) = Kokkos::ArithTraits<mag_type>::sqrt(normsOut(j));
252 #endif
253  }
254  } else {
255  // There's not as much parallelism now, but that's OK. The
256  // point of doing parallel dispatch here is to keep the norm
257  // results on the device, thus avoiding a copy to the host
258  // and back again.
259  SquareRootFunctor<RV> f(normsOut);
260  typedef typename RV::execution_space execution_space;
261  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
262  Kokkos::parallel_for(range_type(0, numVecs), f);
263  }
264  }
265 }
266 
267 } // namespace Impl
268 
269 template <class ValueType,
270  class ArrayLayout,
271  class DeviceType,
272  class MagnitudeType>
273 void normImpl(MagnitudeType norms[],
274  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
275  const EWhichNorm whichNorm,
276  const Teuchos::ArrayView<const size_t>& whichVecs,
277  const bool isConstantStride,
278  const bool isDistributed,
279  const Teuchos::Comm<int>* comm) {
280  using execution_space = typename DeviceType::execution_space;
281  using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
282  // using XMV = Kokkos::View<const ValueType**, ArrayLayout, DeviceType>;
283  // using pair_type = std::pair<size_t, size_t>;
284 
285  const size_t numVecs = isConstantStride ? static_cast<size_t>(X.extent(1)) : static_cast<size_t>(whichVecs.size());
286  if (numVecs == 0) {
287  return; // nothing to do
288  }
289  RV normsOut(norms, numVecs);
290 
291  Impl::lclNormImpl(normsOut, X, numVecs, whichVecs,
292  isConstantStride, whichNorm);
293 
294  // lbv 03/15/23: the data from the local norm calculation
295  // better really be available before communication happens
296  // so fencing to make sure the local computations have
297  // completed on device. We might want to make this an
298  // execution space fence down the road?
299  execution_space exec_space_instance = execution_space();
300  exec_space_instance.fence();
301 
302  Impl::gblNormImpl(normsOut, comm, isDistributed, whichNorm);
303 }
304 
305 } // namespace Details
306 } // namespace Tpetra
307 
308 #endif // TPETRA_DETAILS_NORMIMPL_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).