Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_normImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_NORMIMPL_DEF_HPP
43 #define TPETRA_DETAILS_NORMIMPL_DEF_HPP
44 
47 
48 #include "Tpetra_Details_normImpl_decl.hpp" // for the enum
49 // #include "Tpetra_Details_Behavior.hpp"
50 // #include "Tpetra_Details_isInterComm.hpp"
51 // #include "Tpetra_Details_Profiling.hpp"
52 #include "Teuchos_ArrayView.hpp"
53 #include "Teuchos_CommHelpers.hpp"
54 #include "KokkosBlas.hpp"
55 #include "Kokkos_ArithTraits.hpp"
56 
57 namespace Tpetra {
58 namespace Details {
59 namespace Impl {
60 
61 template<class RV, class XMV>
62 void
63 lclNormImpl (const RV& normsOut,
64  const XMV& X,
65  const size_t numVecs,
66  const Teuchos::ArrayView<const size_t>& whichVecs,
67  const bool constantStride,
68  const EWhichNorm whichNorm)
69 {
70  using Kokkos::ALL;
71  using Kokkos::subview;
72  using mag_type = typename RV::non_const_value_type;
73 
74  static_assert (static_cast<int> (RV::Rank) == 1,
75  "Tpetra::MultiVector::lclNormImpl: "
76  "The first argument normsOut must have rank 1.");
77  static_assert (Kokkos::Impl::is_view<XMV>::value,
78  "Tpetra::MultiVector::lclNormImpl: "
79  "The second argument X is not a Kokkos::View.");
80  static_assert (static_cast<int> (XMV::Rank) == 2,
81  "Tpetra::MultiVector::lclNormImpl: "
82  "The second argument X must have rank 2.");
83 
84  const size_t lclNumRows = static_cast<size_t> (X.extent (0));
85  TEUCHOS_TEST_FOR_EXCEPTION
86  (lclNumRows != 0 && constantStride &&
87  static_cast<size_t> (X.extent (1)) != numVecs,
88  std::logic_error, "Constant Stride X's dimensions are " << X.extent (0)
89  << " x " << X.extent (1) << ", which differ from the local dimensions "
90  << lclNumRows << " x " << numVecs << ". Please report this bug to "
91  "the Tpetra developers.");
92  TEUCHOS_TEST_FOR_EXCEPTION
93  (lclNumRows != 0 && ! constantStride &&
94  static_cast<size_t> (X.extent (1)) < numVecs,
95  std::logic_error, "Strided X's dimensions are " << X.extent (0) << " x "
96  << X.extent (1) << ", which are incompatible with the local dimensions "
97  << lclNumRows << " x " << numVecs << ". Please report this bug to "
98  "the Tpetra developers.");
99 
100  if (lclNumRows == 0) {
101  const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero ();
102  Kokkos::deep_copy (normsOut, zeroMag);
103  }
104  else { // lclNumRows != 0
105  if (constantStride) {
106  if (whichNorm == NORM_INF) {
107  KokkosBlas::nrminf (normsOut, X);
108  }
109  else if (whichNorm == NORM_ONE) {
110  KokkosBlas::nrm1 (normsOut, X);
111  }
112  else if (whichNorm == NORM_TWO) {
113  KokkosBlas::nrm2_squared (normsOut, X);
114  }
115  else {
116  TEUCHOS_TEST_FOR_EXCEPTION
117  (true, std::logic_error, "Should never get here!");
118  }
119  }
120  else { // not constant stride
121  // NOTE (mfh 15 Jul 2014, 11 Apr 2019) This does a kernel launch
122  // for every column. It might be better to have a kernel that
123  // does the work all at once. On the other hand, we don't
124  // prioritize performance of MultiVector views of noncontiguous
125  // columns.
126  for (size_t k = 0; k < numVecs; ++k) {
127  const size_t X_col = constantStride ? k : whichVecs[k];
128  if (whichNorm == NORM_INF) {
129  KokkosBlas::nrminf (subview (normsOut, k),
130  subview (X, ALL (), X_col));
131  }
132  else if (whichNorm == NORM_ONE) {
133  KokkosBlas::nrm1 (subview (normsOut, k),
134  subview (X, ALL (), X_col));
135  }
136  else if (whichNorm == NORM_TWO) {
137  KokkosBlas::nrm2_squared (subview (normsOut, k),
138  subview (X, ALL (), X_col));
139  }
140  else {
141  TEUCHOS_TEST_FOR_EXCEPTION
142  (true, std::logic_error, "Should never get here!");
143  }
144  } // for each column
145  } // constantStride
146  } // lclNumRows != 0
147 }
148 
149 // Kokkos::parallel_for functor for applying square root to each
150 // entry of a 1-D Kokkos::View.
151 template<class ViewType>
152 class SquareRootFunctor {
153 public:
154  typedef typename ViewType::execution_space execution_space;
155  typedef typename ViewType::size_type size_type;
156 
157  SquareRootFunctor (const ViewType& theView) :
158  theView_ (theView)
159  {}
160 
161  KOKKOS_INLINE_FUNCTION void
162  operator() (const size_type& i) const
163  {
164  typedef typename ViewType::non_const_value_type value_type;
165  typedef Kokkos::Details::ArithTraits<value_type> KAT;
166  theView_(i) = KAT::sqrt (theView_(i));
167  }
168 
169 private:
170  ViewType theView_;
171 };
172 
173 template<class RV>
174 void
175 gblNormImpl (const RV& normsOut,
176  const Teuchos::Comm<int>* const comm,
177  const bool distributed,
178  const EWhichNorm whichNorm)
179 {
180  using Teuchos::REDUCE_MAX;
181  using Teuchos::REDUCE_SUM;
182  using Teuchos::reduceAll;
183  typedef typename RV::non_const_value_type mag_type;
184 
185  const size_t numVecs = normsOut.extent (0);
186 
187  // If the MultiVector is distributed over multiple processes, do
188  // the distributed (interprocess) part of the norm. We assume
189  // that the MPI implementation can read from and write to device
190  // memory.
191  //
192  // replaceMap() may have removed some processes. Those processes
193  // have a null Map. They must not participate in any collective
194  // operations. We ask first whether the Map is null, because
195  // isDistributed() defers that question to the Map. We still
196  // compute and return local norms for processes not participating
197  // in collective operations; those probably don't make any sense,
198  // but it doesn't hurt to do them, since it's illegal to call
199  // norm*() on those processes anyway.
200  if (distributed && comm != nullptr) {
201  // The calling process only participates in the collective if
202  // both the Map and its Comm on that process are nonnull.
203  //
204  // MPI doesn't allow aliasing of arguments, so we have to make
205  // a copy of the local sum.
206  RV lclNorms ("MV::normImpl lcl", numVecs);
207  Kokkos::deep_copy (lclNorms, normsOut);
208  const mag_type* const lclSum = lclNorms.data ();
209  mag_type* const gblSum = normsOut.data ();
210  const int nv = static_cast<int> (numVecs);
211  if (whichNorm == NORM_INF) {
212  reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
213  } else {
214  reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
215  }
216  }
217 
218  if (whichNorm == NORM_TWO) {
219  // Replace the norm-squared results with their square roots in
220  // place, to get the final output. If the device memory and
221  // the host memory are the same, it probably doesn't pay to
222  // launch a parallel kernel for that, since there isn't enough
223  // parallelism for the typical MultiVector case.
224  const bool inHostMemory =
225  Kokkos::Impl::is_same<typename RV::memory_space,
226  typename RV::host_mirror_space::memory_space>::value;
227  if (inHostMemory) {
228  for (size_t j = 0; j < numVecs; ++j) {
229  normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
230  }
231  }
232  else {
233  // There's not as much parallelism now, but that's OK. The
234  // point of doing parallel dispatch here is to keep the norm
235  // results on the device, thus avoiding a copy to the host
236  // and back again.
237  SquareRootFunctor<RV> f (normsOut);
238  typedef typename RV::execution_space execution_space;
239  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
240  Kokkos::parallel_for (range_type (0, numVecs), f);
241  }
242  }
243 }
244 
245 } // namespace Impl
246 
247 template <class ValueType,
248  class ArrayLayout,
249  class DeviceType,
250  class MagnitudeType>
251 void
252 normImpl (MagnitudeType norms[],
253  const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
254  const EWhichNorm whichNorm,
255  const Teuchos::ArrayView<const size_t>& whichVecs,
256  const bool isConstantStride,
257  const bool isDistributed,
258  const Teuchos::Comm<int>* comm)
259 {
260  using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
261  //using XMV = Kokkos::View<const ValueType**, ArrayLayout, DeviceType>;
262  //using pair_type = std::pair<size_t, size_t>;
263 
264  const size_t numVecs = isConstantStride ?
265  static_cast<size_t> (X.extent (1)) :
266  static_cast<size_t> (whichVecs.size ());
267  if (numVecs == 0) {
268  return; // nothing to do
269  }
270  RV normsOut (norms, numVecs);
271 
272  Impl::lclNormImpl (normsOut, X, numVecs, whichVecs,
273  isConstantStride, whichNorm);
274  Impl::gblNormImpl (normsOut, comm, isDistributed, whichNorm);
275 }
276 
277 } // namespace Details
278 } // namespace Tpetra
279 
280 #define TPETRA_DETAILS_NORMIMPL_INSTANT( SC, NT ) \
281 namespace Details { \
282  template void \
283  normImpl< \
284  Kokkos::ArithTraits<SC>::val_type, \
285  Kokkos::LayoutLeft, \
286  NT::device_type, \
287  Kokkos::ArithTraits<SC>::mag_type \
288  > (Kokkos::ArithTraits<SC>::mag_type[], \
289  const Kokkos::View< \
290  const Kokkos::ArithTraits<SC>::val_type**, \
291  Kokkos::LayoutLeft, \
292  NT::device_type>&, \
293  const EWhichNorm, \
294  const Teuchos::ArrayView<const size_t>&, \
295  const bool, \
296  const bool, \
297  const Teuchos::Comm<int>* ); \
298 }
299 
300 #endif // TPETRA_DETAILS_NORMIMPL_DEF_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).