Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_normImpl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_NORMIMPL_HPP
11 #define TPETRA_DETAILS_NORMIMPL_HPP
12 
21 
22 #include "TpetraCore_config.h"
23 #include "Kokkos_Core.hpp"
24 #include "Teuchos_ArrayView.hpp"
25 #include "Teuchos_CommHelpers.hpp"
26 #include "KokkosBlas.hpp"
27 #include "Kokkos_ArithTraits.hpp"
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 namespace Teuchos {
31  template<class T>
32  class ArrayView; // forward declaration
33  template<class OrdinalType>
34  class Comm; // forward declaration
35 }
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
37 
39 // Declarations start here
41 
42 namespace Tpetra {
43 namespace Details {
44 
46 enum EWhichNorm {
47  NORM_ONE, //<! Use the one-norm
48  NORM_TWO, //<! Use the two-norm
49  NORM_INF //<! Use the infinity-norm
50 };
51 
53 template <class ValueType,
54  class ArrayLayout,
55  class DeviceType,
56  class MagnitudeType>
57 void
58 normImpl (MagnitudeType norms[],
59  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
60  const EWhichNorm whichNorm,
61  const Teuchos::ArrayView<const size_t>& whichVecs,
62  const bool isConstantStride,
63  const bool isDistributed,
64  const Teuchos::Comm<int>* comm);
65 
66 } // namespace Details
67 } // namespace Tpetra
68 
70 // Definitions start here
72 
73 namespace Tpetra {
74 namespace Details {
75 namespace Impl {
76 
77 template<class RV, class XMV>
78 void
79 lclNormImpl (const RV& normsOut,
80  const XMV& X,
81  const size_t numVecs,
82  const Teuchos::ArrayView<const size_t>& whichVecs,
83  const bool constantStride,
84  const EWhichNorm whichNorm)
85 {
86  using Kokkos::ALL;
87  using Kokkos::subview;
88  using mag_type = typename RV::non_const_value_type;
89 
90  static_assert (static_cast<int> (RV::rank) == 1,
91  "Tpetra::MultiVector::lclNormImpl: "
92  "The first argument normsOut must have rank 1.");
93  static_assert (Kokkos::is_view<XMV>::value,
94  "Tpetra::MultiVector::lclNormImpl: "
95  "The second argument X is not a Kokkos::View.");
96  static_assert (static_cast<int> (XMV::rank) == 2,
97  "Tpetra::MultiVector::lclNormImpl: "
98  "The second argument X must have rank 2.");
99 
100  const size_t lclNumRows = static_cast<size_t> (X.extent (0));
101  TEUCHOS_TEST_FOR_EXCEPTION
102  (lclNumRows != 0 && constantStride &&
103  static_cast<size_t> (X.extent (1)) != numVecs,
104  std::logic_error, "Constant Stride X's dimensions are " << X.extent (0)
105  << " x " << X.extent (1) << ", which differ from the local dimensions "
106  << lclNumRows << " x " << numVecs << ". Please report this bug to "
107  "the Tpetra developers.");
108  TEUCHOS_TEST_FOR_EXCEPTION
109  (lclNumRows != 0 && ! constantStride &&
110  static_cast<size_t> (X.extent (1)) < numVecs,
111  std::logic_error, "Strided X's dimensions are " << X.extent (0) << " x "
112  << X.extent (1) << ", which are incompatible with the local dimensions "
113  << lclNumRows << " x " << numVecs << ". Please report this bug to "
114  "the Tpetra developers.");
115 
116  if (lclNumRows == 0) {
117  const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero ();
118  // DEEP_COPY REVIEW - VALUE-TO-DEVICE
119  using execution_space = typename RV::execution_space;
120  Kokkos::deep_copy (execution_space(), normsOut, zeroMag);
121  }
122  else { // lclNumRows != 0
123  if (constantStride) {
124  if (whichNorm == NORM_INF) {
125  KokkosBlas::nrminf (normsOut, X);
126  }
127  else if (whichNorm == NORM_ONE) {
128  KokkosBlas::nrm1 (normsOut, X);
129  }
130  else if (whichNorm == NORM_TWO) {
131  KokkosBlas::nrm2_squared (normsOut, X);
132  }
133  else {
134  TEUCHOS_TEST_FOR_EXCEPTION
135  (true, std::logic_error, "Should never get here!");
136  }
137  }
138  else { // not constant stride
139  // NOTE (mfh 15 Jul 2014, 11 Apr 2019) This does a kernel launch
140  // for every column. It might be better to have a kernel that
141  // does the work all at once. On the other hand, we don't
142  // prioritize performance of MultiVector views of noncontiguous
143  // columns.
144  for (size_t k = 0; k < numVecs; ++k) {
145  const size_t X_col = constantStride ? k : whichVecs[k];
146  if (whichNorm == NORM_INF) {
147  KokkosBlas::nrminf (subview (normsOut, k),
148  subview (X, ALL (), X_col));
149  }
150  else if (whichNorm == NORM_ONE) {
151  KokkosBlas::nrm1 (subview (normsOut, k),
152  subview (X, ALL (), X_col));
153  }
154  else if (whichNorm == NORM_TWO) {
155  KokkosBlas::nrm2_squared (subview (normsOut, k),
156  subview (X, ALL (), X_col));
157  }
158  else {
159  TEUCHOS_TEST_FOR_EXCEPTION
160  (true, std::logic_error, "Should never get here!");
161  }
162  } // for each column
163  } // constantStride
164  } // lclNumRows != 0
165 }
166 
167 // Kokkos::parallel_for functor for applying square root to each
168 // entry of a 1-D Kokkos::View.
169 template<class ViewType>
170 class SquareRootFunctor {
171 public:
172  typedef typename ViewType::execution_space execution_space;
173  typedef typename ViewType::size_type size_type;
174 
175  SquareRootFunctor (const ViewType& theView) :
176  theView_ (theView)
177  {}
178 
179  KOKKOS_INLINE_FUNCTION void
180  operator() (const size_type& i) const
181  {
182  typedef typename ViewType::non_const_value_type value_type;
183  typedef Kokkos::ArithTraits<value_type> KAT;
184  theView_(i) = KAT::sqrt (theView_(i));
185  }
186 
187 private:
188  ViewType theView_;
189 };
190 
191 template<class RV>
192 void
193 gblNormImpl (const RV& normsOut,
194  const Teuchos::Comm<int>* const comm,
195  const bool distributed,
196  const EWhichNorm whichNorm)
197 {
198  using Teuchos::REDUCE_MAX;
199  using Teuchos::REDUCE_SUM;
200  using Teuchos::reduceAll;
201  typedef typename RV::non_const_value_type mag_type;
202 
203  const size_t numVecs = normsOut.extent (0);
204 
205  // If the MultiVector is distributed over multiple processes, do
206  // the distributed (interprocess) part of the norm. We assume
207  // that the MPI implementation can read from and write to device
208  // memory.
209  //
210  // replaceMap() may have removed some processes. Those processes
211  // have a null Map. They must not participate in any collective
212  // operations. We ask first whether the Map is null, because
213  // isDistributed() defers that question to the Map. We still
214  // compute and return local norms for processes not participating
215  // in collective operations; those probably don't make any sense,
216  // but it doesn't hurt to do them, since it's illegal to call
217  // norm*() on those processes anyway.
218  if (distributed && comm != nullptr) {
219  // The calling process only participates in the collective if
220  // both the Map and its Comm on that process are nonnull.
221 
222  const int nv = static_cast<int> (numVecs);
223  const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
224 
225  if (commIsInterComm) {
226  RV lclNorms (Kokkos::ViewAllocateWithoutInitializing ("MV::normImpl lcl"), numVecs);
227  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
228  using execution_space = typename RV::execution_space;
229  Kokkos::deep_copy (execution_space(), lclNorms, normsOut);
230  const mag_type* const lclSum = lclNorms.data ();
231  mag_type* const gblSum = normsOut.data ();
232 
233  if (whichNorm == NORM_INF) {
234  reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
235  } else {
236  reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
237  }
238  } else {
239  mag_type* const gblSum = normsOut.data ();
240  if (whichNorm == NORM_INF) {
241  reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, gblSum, gblSum);
242  } else {
243  reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, gblSum, gblSum);
244  }
245  }
246  }
247 
248  if (whichNorm == NORM_TWO) {
249  // Replace the norm-squared results with their square roots in
250  // place, to get the final output. If the device memory and
251  // the host memory are the same, it probably doesn't pay to
252  // launch a parallel kernel for that, since there isn't enough
253  // parallelism for the typical MultiVector case.
254  const bool inHostMemory =
255  std::is_same<typename RV::memory_space,
256  typename RV::host_mirror_space::memory_space>::value;
257  if (inHostMemory) {
258  for (size_t j = 0; j < numVecs; ++j) {
259  normsOut(j) = Kokkos::ArithTraits<mag_type>::sqrt (normsOut(j));
260  }
261  }
262  else {
263  // There's not as much parallelism now, but that's OK. The
264  // point of doing parallel dispatch here is to keep the norm
265  // results on the device, thus avoiding a copy to the host
266  // and back again.
267  SquareRootFunctor<RV> f (normsOut);
268  typedef typename RV::execution_space execution_space;
269  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
270  Kokkos::parallel_for (range_type (0, numVecs), f);
271  }
272  }
273 }
274 
275 } // namespace Impl
276 
277 template <class ValueType,
278  class ArrayLayout,
279  class DeviceType,
280  class MagnitudeType>
281 void
282 normImpl (MagnitudeType norms[],
283  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
284  const EWhichNorm whichNorm,
285  const Teuchos::ArrayView<const size_t>& whichVecs,
286  const bool isConstantStride,
287  const bool isDistributed,
288  const Teuchos::Comm<int>* comm)
289 {
290  using execution_space = typename DeviceType::execution_space;
291  using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
292  //using XMV = Kokkos::View<const ValueType**, ArrayLayout, DeviceType>;
293  //using pair_type = std::pair<size_t, size_t>;
294 
295  const size_t numVecs = isConstantStride ?
296  static_cast<size_t> (X.extent (1)) :
297  static_cast<size_t> (whichVecs.size ());
298  if (numVecs == 0) {
299  return; // nothing to do
300  }
301  RV normsOut (norms, numVecs);
302 
303  Impl::lclNormImpl (normsOut, X, numVecs, whichVecs,
304  isConstantStride, whichNorm);
305 
306  // lbv 03/15/23: the data from the local norm calculation
307  // better really be available before communication happens
308  // so fencing to make sure the local computations have
309  // completed on device. We might want to make this an
310  // execution space fence down the road?
311  execution_space exec_space_instance = execution_space();
312  exec_space_instance.fence();
313 
314  Impl::gblNormImpl (normsOut, comm, isDistributed, whichNorm);
315 }
316 
317 } // namespace Details
318 } // namespace Tpetra
319 
320 #endif // TPETRA_DETAILS_NORMIMPL_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).