Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_normImpl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_NORMIMPL_HPP
43 #define TPETRA_DETAILS_NORMIMPL_HPP
44 
53 
54 #include "TpetraCore_config.h"
55 #include "Kokkos_Core.hpp"
56 #include "Teuchos_ArrayView.hpp"
57 #include "Teuchos_CommHelpers.hpp"
58 #include "KokkosBlas.hpp"
59 #include "Kokkos_ArithTraits.hpp"
60 
61 #ifndef DOXYGEN_SHOULD_SKIP_THIS
62 namespace Teuchos {
63  template<class T>
64  class ArrayView; // forward declaration
65  template<class OrdinalType>
66  class Comm; // forward declaration
67 }
68 #endif // DOXYGEN_SHOULD_SKIP_THIS
69 
71 // Declarations start here
73 
74 namespace Tpetra {
75 namespace Details {
76 
78 enum EWhichNorm {
79  NORM_ONE, //<! Use the one-norm
80  NORM_TWO, //<! Use the two-norm
81  NORM_INF //<! Use the infinity-norm
82 };
83 
85 template <class ValueType,
86  class ArrayLayout,
87  class DeviceType,
88  class MagnitudeType>
89 void
90 normImpl (MagnitudeType norms[],
91  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
92  const EWhichNorm whichNorm,
93  const Teuchos::ArrayView<const size_t>& whichVecs,
94  const bool isConstantStride,
95  const bool isDistributed,
96  const Teuchos::Comm<int>* comm);
97 
98 } // namespace Details
99 } // namespace Tpetra
100 
102 // Definitions start here
104 
105 namespace Tpetra {
106 namespace Details {
107 namespace Impl {
108 
109 template<class RV, class XMV>
110 void
111 lclNormImpl (const RV& normsOut,
112  const XMV& X,
113  const size_t numVecs,
114  const Teuchos::ArrayView<const size_t>& whichVecs,
115  const bool constantStride,
116  const EWhichNorm whichNorm)
117 {
118  using Kokkos::ALL;
119  using Kokkos::subview;
120  using mag_type = typename RV::non_const_value_type;
121 
122  static_assert (static_cast<int> (RV::Rank) == 1,
123  "Tpetra::MultiVector::lclNormImpl: "
124  "The first argument normsOut must have rank 1.");
125  static_assert (Kokkos::Impl::is_view<XMV>::value,
126  "Tpetra::MultiVector::lclNormImpl: "
127  "The second argument X is not a Kokkos::View.");
128  static_assert (static_cast<int> (XMV::Rank) == 2,
129  "Tpetra::MultiVector::lclNormImpl: "
130  "The second argument X must have rank 2.");
131 
132  const size_t lclNumRows = static_cast<size_t> (X.extent (0));
133  TEUCHOS_TEST_FOR_EXCEPTION
134  (lclNumRows != 0 && constantStride &&
135  static_cast<size_t> (X.extent (1)) != numVecs,
136  std::logic_error, "Constant Stride X's dimensions are " << X.extent (0)
137  << " x " << X.extent (1) << ", which differ from the local dimensions "
138  << lclNumRows << " x " << numVecs << ". Please report this bug to "
139  "the Tpetra developers.");
140  TEUCHOS_TEST_FOR_EXCEPTION
141  (lclNumRows != 0 && ! constantStride &&
142  static_cast<size_t> (X.extent (1)) < numVecs,
143  std::logic_error, "Strided X's dimensions are " << X.extent (0) << " x "
144  << X.extent (1) << ", which are incompatible with the local dimensions "
145  << lclNumRows << " x " << numVecs << ". Please report this bug to "
146  "the Tpetra developers.");
147 
148  if (lclNumRows == 0) {
149  const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero ();
150  Kokkos::deep_copy (normsOut, zeroMag);
151  }
152  else { // lclNumRows != 0
153  if (constantStride) {
154  if (whichNorm == NORM_INF) {
155  KokkosBlas::nrminf (normsOut, X);
156  }
157  else if (whichNorm == NORM_ONE) {
158  KokkosBlas::nrm1 (normsOut, X);
159  }
160  else if (whichNorm == NORM_TWO) {
161  KokkosBlas::nrm2_squared (normsOut, X);
162  }
163  else {
164  TEUCHOS_TEST_FOR_EXCEPTION
165  (true, std::logic_error, "Should never get here!");
166  }
167  }
168  else { // not constant stride
169  // NOTE (mfh 15 Jul 2014, 11 Apr 2019) This does a kernel launch
170  // for every column. It might be better to have a kernel that
171  // does the work all at once. On the other hand, we don't
172  // prioritize performance of MultiVector views of noncontiguous
173  // columns.
174  for (size_t k = 0; k < numVecs; ++k) {
175  const size_t X_col = constantStride ? k : whichVecs[k];
176  if (whichNorm == NORM_INF) {
177  KokkosBlas::nrminf (subview (normsOut, k),
178  subview (X, ALL (), X_col));
179  }
180  else if (whichNorm == NORM_ONE) {
181  KokkosBlas::nrm1 (subview (normsOut, k),
182  subview (X, ALL (), X_col));
183  }
184  else if (whichNorm == NORM_TWO) {
185  KokkosBlas::nrm2_squared (subview (normsOut, k),
186  subview (X, ALL (), X_col));
187  }
188  else {
189  TEUCHOS_TEST_FOR_EXCEPTION
190  (true, std::logic_error, "Should never get here!");
191  }
192  } // for each column
193  } // constantStride
194  } // lclNumRows != 0
195 }
196 
197 // Kokkos::parallel_for functor for applying square root to each
198 // entry of a 1-D Kokkos::View.
199 template<class ViewType>
200 class SquareRootFunctor {
201 public:
202  typedef typename ViewType::execution_space execution_space;
203  typedef typename ViewType::size_type size_type;
204 
205  SquareRootFunctor (const ViewType& theView) :
206  theView_ (theView)
207  {}
208 
209  KOKKOS_INLINE_FUNCTION void
210  operator() (const size_type& i) const
211  {
212  typedef typename ViewType::non_const_value_type value_type;
213  typedef Kokkos::Details::ArithTraits<value_type> KAT;
214  theView_(i) = KAT::sqrt (theView_(i));
215  }
216 
217 private:
218  ViewType theView_;
219 };
220 
221 template<class RV>
222 void
223 gblNormImpl (const RV& normsOut,
224  const Teuchos::Comm<int>* const comm,
225  const bool distributed,
226  const EWhichNorm whichNorm)
227 {
228  using Teuchos::REDUCE_MAX;
229  using Teuchos::REDUCE_SUM;
230  using Teuchos::reduceAll;
231  typedef typename RV::non_const_value_type mag_type;
232 
233  const size_t numVecs = normsOut.extent (0);
234 
235  // If the MultiVector is distributed over multiple processes, do
236  // the distributed (interprocess) part of the norm. We assume
237  // that the MPI implementation can read from and write to device
238  // memory.
239  //
240  // replaceMap() may have removed some processes. Those processes
241  // have a null Map. They must not participate in any collective
242  // operations. We ask first whether the Map is null, because
243  // isDistributed() defers that question to the Map. We still
244  // compute and return local norms for processes not participating
245  // in collective operations; those probably don't make any sense,
246  // but it doesn't hurt to do them, since it's illegal to call
247  // norm*() on those processes anyway.
248  if (distributed && comm != nullptr) {
249  // The calling process only participates in the collective if
250  // both the Map and its Comm on that process are nonnull.
251  //
252  // MPI doesn't allow aliasing of arguments, so we have to make
253  // a copy of the local sum.
254  RV lclNorms ("MV::normImpl lcl", numVecs);
255  Kokkos::deep_copy (lclNorms, normsOut);
256  const mag_type* const lclSum = lclNorms.data ();
257  mag_type* const gblSum = normsOut.data ();
258  const int nv = static_cast<int> (numVecs);
259  if (whichNorm == NORM_INF) {
260  reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
261  } else {
262  reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
263  }
264  }
265 
266  if (whichNorm == NORM_TWO) {
267  // Replace the norm-squared results with their square roots in
268  // place, to get the final output. If the device memory and
269  // the host memory are the same, it probably doesn't pay to
270  // launch a parallel kernel for that, since there isn't enough
271  // parallelism for the typical MultiVector case.
272  const bool inHostMemory =
273  std::is_same<typename RV::memory_space,
274  typename RV::host_mirror_space::memory_space>::value;
275  if (inHostMemory) {
276  for (size_t j = 0; j < numVecs; ++j) {
277  normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
278  }
279  }
280  else {
281  // There's not as much parallelism now, but that's OK. The
282  // point of doing parallel dispatch here is to keep the norm
283  // results on the device, thus avoiding a copy to the host
284  // and back again.
285  SquareRootFunctor<RV> f (normsOut);
286  typedef typename RV::execution_space execution_space;
287  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
288  Kokkos::parallel_for (range_type (0, numVecs), f);
289  }
290  }
291 }
292 
293 } // namespace Impl
294 
295 template <class ValueType,
296  class ArrayLayout,
297  class DeviceType,
298  class MagnitudeType>
299 void
300 normImpl (MagnitudeType norms[],
301  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
302  const EWhichNorm whichNorm,
303  const Teuchos::ArrayView<const size_t>& whichVecs,
304  const bool isConstantStride,
305  const bool isDistributed,
306  const Teuchos::Comm<int>* comm)
307 {
308  using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
309  //using XMV = Kokkos::View<const ValueType**, ArrayLayout, DeviceType>;
310  //using pair_type = std::pair<size_t, size_t>;
311 
312  const size_t numVecs = isConstantStride ?
313  static_cast<size_t> (X.extent (1)) :
314  static_cast<size_t> (whichVecs.size ());
315  if (numVecs == 0) {
316  return; // nothing to do
317  }
318  RV normsOut (norms, numVecs);
319 
320  Impl::lclNormImpl (normsOut, X, numVecs, whichVecs,
321  isConstantStride, whichNorm);
322  Impl::gblNormImpl (normsOut, comm, isDistributed, whichNorm);
323 }
324 
325 } // namespace Details
326 } // namespace Tpetra
327 
328 #endif // TPETRA_DETAILS_NORMIMPL_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).