Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
12 
13 #include "Tpetra_BlockMultiVector_decl.hpp"
15 #include "Tpetra_BlockView.hpp"
16 #include "Teuchos_OrdinalTraits.hpp"
17 
18 namespace Tpetra {
19 
20 template <class Scalar, class LO, class GO, class Node>
21 const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type&
24  return mv_;
25 }
26 
27 template <class Scalar, class LO, class GO, class Node>
31  return mv_;
32 }
33 
34 template <class Scalar, class LO, class GO, class Node>
35 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
36 BlockMultiVector<Scalar, LO, GO, Node>::
37  getBlockMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
38  typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
39  const BMV* src_bmv = dynamic_cast<const BMV*>(&src);
40  TEUCHOS_TEST_FOR_EXCEPTION(
41  src_bmv == nullptr, std::invalid_argument,
42  "Tpetra::"
43  "BlockMultiVector: The source object of an Import or Export to a "
44  "BlockMultiVector, must also be a BlockMultiVector.");
45  return Teuchos::rcp(src_bmv, false); // nonowning RCP
46 }
47 
48 template <class Scalar, class LO, class GO, class Node>
51  const Teuchos::DataAccess copyOrView)
52  : dist_object_type(in)
53  , meshMap_(in.meshMap_)
54  , pointMap_(in.pointMap_)
55  , mv_(in.mv_, copyOrView)
56  , blockSize_(in.blockSize_) {}
57 
58 template <class Scalar, class LO, class GO, class Node>
60  BlockMultiVector(const map_type& meshMap,
61  const LO blockSize,
62  const LO numVecs)
63  : dist_object_type(Teuchos::rcp(new map_type(meshMap)))
64  , // shallow copy
65  meshMap_(meshMap)
66  , pointMap_(makePointMapRCP(meshMap, blockSize))
67  , mv_(pointMap_, numVecs)
68  , // nonowning RCP is OK, since pointMap_ won't go away
69  blockSize_(blockSize) {}
70 
71 template <class Scalar, class LO, class GO, class Node>
73  BlockMultiVector(const map_type& meshMap,
74  const map_type& pointMap,
75  const LO blockSize,
76  const LO numVecs)
77  : dist_object_type(Teuchos::rcp(new map_type(meshMap)))
78  , // shallow copy
79  meshMap_(meshMap)
80  , pointMap_(new map_type(pointMap))
81  , mv_(pointMap_, numVecs)
82  , blockSize_(blockSize) {}
83 
84 template <class Scalar, class LO, class GO, class Node>
87  const map_type& meshMap,
88  const LO blockSize)
89  : dist_object_type(Teuchos::rcp(new map_type(meshMap)))
90  , // shallow copy
91  meshMap_(meshMap)
92  , blockSize_(blockSize) {
93  using Teuchos::RCP;
94 
95  if (X_mv.getCopyOrView() == Teuchos::View) {
96  // The input MultiVector has view semantics, so assignment just
97  // does a shallow copy.
98  mv_ = X_mv;
99  } else if (X_mv.getCopyOrView() == Teuchos::Copy) {
100  // The input MultiVector has copy semantics. We can't change
101  // that, but we can make a view of the input MultiVector and
102  // change the view to have view semantics. (That sounds silly;
103  // shouldn't views always have view semantics? but remember that
104  // "view semantics" just governs the default behavior of the copy
105  // constructor and assignment operator.)
106  RCP<const mv_type> X_view_const;
107  const size_t numCols = X_mv.getNumVectors();
108  if (numCols == 0) {
109  Teuchos::Array<size_t> cols(0); // view will have zero columns
110  X_view_const = X_mv.subView(cols());
111  } else { // Range1D is an inclusive range
112  X_view_const = X_mv.subView(Teuchos::Range1D(0, numCols - 1));
113  }
114  TEUCHOS_TEST_FOR_EXCEPTION(
115  X_view_const.is_null(), std::logic_error,
116  "Tpetra::"
117  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
118  "should never happen. Please report this bug to the Tpetra developers.");
119 
120  // It's perfectly OK to cast away const here. Those view methods
121  // should be marked const anyway, because views can't change the
122  // allocation (just the entries themselves).
123  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type>(X_view_const);
124  TEUCHOS_TEST_FOR_EXCEPTION(
125  X_view->getCopyOrView() != Teuchos::View, std::logic_error,
126  "Tpetra::"
127  "BlockMultiVector constructor: We just set a MultiVector "
128  "to have view semantics, but it claims that it doesn't have view "
129  "semantics. This should never happen. "
130  "Please report this bug to the Tpetra developers.");
131  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
132  }
133 
134  // At this point, mv_ has been assigned, so we can ignore X_mv.
135  Teuchos::RCP<const map_type> pointMap = mv_.getMap();
136  pointMap_ = pointMap;
137 }
138 
139 template <class Scalar, class LO, class GO, class Node>
142  const map_type& newMeshMap,
143  const map_type& newPointMap,
144  const size_t offset)
145  : dist_object_type(Teuchos::rcp(new map_type(newMeshMap)))
146  , // shallow copy
147  meshMap_(newMeshMap)
148  , pointMap_(new map_type(newPointMap))
149  , mv_(X.mv_, newPointMap, offset * X.getBlockSize())
150  , // MV "offset view" constructor
151  blockSize_(X.getBlockSize()) {}
152 
153 template <class Scalar, class LO, class GO, class Node>
156  const map_type& newMeshMap,
157  const size_t offset)
158  : dist_object_type(Teuchos::rcp(new map_type(newMeshMap)))
159  , // shallow copy
160  meshMap_(newMeshMap)
161  , pointMap_(makePointMapRCP(newMeshMap, X.getBlockSize()))
162  , mv_(X.mv_, pointMap_, offset * X.getBlockSize())
163  , // MV "offset view" constructor
164  blockSize_(X.getBlockSize()) {}
165 
166 template <class Scalar, class LO, class GO, class Node>
169  : dist_object_type(Teuchos::null)
170  , blockSize_(0) {}
171 
172 template <class Scalar, class LO, class GO, class Node>
175  makePointMap(const map_type& meshMap, const LO blockSize) {
176  typedef Tpetra::global_size_t GST;
177  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
178 
179  const GST gblNumMeshMapInds =
180  static_cast<GST>(meshMap.getGlobalNumElements());
181  const size_t lclNumMeshMapIndices =
182  static_cast<size_t>(meshMap.getLocalNumElements());
183  const GST gblNumPointMapInds =
184  gblNumMeshMapInds * static_cast<GST>(blockSize);
185  const size_t lclNumPointMapInds =
186  lclNumMeshMapIndices * static_cast<size_t>(blockSize);
187  const GO indexBase = meshMap.getIndexBase();
188 
189  if (meshMap.isContiguous()) {
190  return map_type(gblNumPointMapInds, lclNumPointMapInds, indexBase,
191  meshMap.getComm());
192  } else {
193  // "Hilbert's Hotel" trick: multiply each process' GIDs by
194  // blockSize, and fill in. That ensures correctness even if the
195  // mesh Map is overlapping.
196  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
197  const size_type lclNumMeshGblInds = lclMeshGblInds.size();
198  Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
199  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
200  const GO meshGid = lclMeshGblInds[g];
201  const GO pointGidStart = indexBase +
202  (meshGid - indexBase) * static_cast<GO>(blockSize);
203  const size_type offset = g * static_cast<size_type>(blockSize);
204  for (LO k = 0; k < blockSize; ++k) {
205  const GO pointGid = pointGidStart + static_cast<GO>(k);
206  lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
207  }
208  }
209  return map_type(gblNumPointMapInds, lclPointGblInds(), indexBase,
210  meshMap.getComm());
211  }
212 }
213 
214 template <class Scalar, class LO, class GO, class Node>
215 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
217  makePointMapRCP(const map_type& meshMap, const LO blockSize) {
218  typedef Tpetra::global_size_t GST;
219  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
220 
221  const GST gblNumMeshMapInds =
222  static_cast<GST>(meshMap.getGlobalNumElements());
223  const size_t lclNumMeshMapIndices =
224  static_cast<size_t>(meshMap.getLocalNumElements());
225  const GST gblNumPointMapInds =
226  gblNumMeshMapInds * static_cast<GST>(blockSize);
227  const size_t lclNumPointMapInds =
228  lclNumMeshMapIndices * static_cast<size_t>(blockSize);
229  const GO indexBase = meshMap.getIndexBase();
230 
231  if (meshMap.isContiguous()) {
232  return Teuchos::rcp(new map_type(gblNumPointMapInds, lclNumPointMapInds, indexBase,
233  meshMap.getComm()));
234  } else {
235  // "Hilbert's Hotel" trick: multiply each process' GIDs by
236  // blockSize, and fill in. That ensures correctness even if the
237  // mesh Map is overlapping.
238  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
239  const size_type lclNumMeshGblInds = lclMeshGblInds.size();
240  Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
241  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
242  const GO meshGid = lclMeshGblInds[g];
243  const GO pointGidStart = indexBase +
244  (meshGid - indexBase) * static_cast<GO>(blockSize);
245  const size_type offset = g * static_cast<size_type>(blockSize);
246  for (LO k = 0; k < blockSize; ++k) {
247  const GO pointGid = pointGidStart + static_cast<GO>(k);
248  lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
249  }
250  }
251  return Teuchos::rcp(new map_type(gblNumPointMapInds, lclPointGblInds(), indexBase,
252  meshMap.getComm()));
253  }
254 }
255 
256 template <class Scalar, class LO, class GO, class Node>
258  replaceLocalValuesImpl(const LO localRowIndex,
259  const LO colIndex,
260  const Scalar vals[]) {
261  auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
262  typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
263  getBlockSize());
264  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
265  using exec_space = typename device_type::execution_space;
266  Kokkos::deep_copy(exec_space(), X_dst, X_src);
267 }
268 
269 template <class Scalar, class LO, class GO, class Node>
271  replaceLocalValues(const LO localRowIndex,
272  const LO colIndex,
273  const Scalar vals[]) {
274  if (!meshMap_.isNodeLocalElement(localRowIndex)) {
275  return false;
276  } else {
277  replaceLocalValuesImpl(localRowIndex, colIndex, vals);
278  return true;
279  }
280 }
281 
282 template <class Scalar, class LO, class GO, class Node>
284  replaceGlobalValues(const GO globalRowIndex,
285  const LO colIndex,
286  const Scalar vals[]) {
287  const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
288  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
289  return false;
290  } else {
291  replaceLocalValuesImpl(localRowIndex, colIndex, vals);
292  return true;
293  }
294 }
295 
296 template <class Scalar, class LO, class GO, class Node>
298  sumIntoLocalValuesImpl(const LO localRowIndex,
299  const LO colIndex,
300  const Scalar vals[]) {
301  auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
302  typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
303  getBlockSize());
304  AXPY(static_cast<impl_scalar_type>(STS::one()), X_src, X_dst);
305 }
306 
307 template <class Scalar, class LO, class GO, class Node>
309  sumIntoLocalValues(const LO localRowIndex,
310  const LO colIndex,
311  const Scalar vals[]) {
312  if (!meshMap_.isNodeLocalElement(localRowIndex)) {
313  return false;
314  } else {
315  sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
316  return true;
317  }
318 }
319 
320 template <class Scalar, class LO, class GO, class Node>
322  sumIntoGlobalValues(const GO globalRowIndex,
323  const LO colIndex,
324  const Scalar vals[]) {
325  const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
326  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
327  return false;
328  } else {
329  sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
330  return true;
331  }
332 }
333 
334 template <class Scalar, class LO, class GO, class Node>
335 typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
337  getLocalBlockHost(const LO localRowIndex,
338  const LO colIndex,
339  const Access::ReadOnlyStruct) const {
340  if (!isValidLocalMeshIndex(localRowIndex)) {
341  return const_little_host_vec_type();
342  } else {
343  const size_t blockSize = getBlockSize();
344  auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345  LO startRow = localRowIndex * blockSize;
346  LO endRow = startRow + blockSize;
347  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
348  colIndex);
349  }
350 }
351 
352 template <class Scalar, class LO, class GO, class Node>
353 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
354 BlockMultiVector<Scalar, LO, GO, Node>::
355  getLocalBlockHost(const LO localRowIndex,
356  const LO colIndex,
357  const Access::OverwriteAllStruct) {
358  if (!isValidLocalMeshIndex(localRowIndex)) {
359  return little_host_vec_type();
360  } else {
361  const size_t blockSize = getBlockSize();
362  auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
363  LO startRow = localRowIndex * blockSize;
364  LO endRow = startRow + blockSize;
365  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
366  colIndex);
367  }
368 }
369 
370 template <class Scalar, class LO, class GO, class Node>
371 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
373  getLocalBlockHost(const LO localRowIndex,
374  const LO colIndex,
375  const Access::ReadWriteStruct) {
376  if (!isValidLocalMeshIndex(localRowIndex)) {
377  return little_host_vec_type();
378  } else {
379  const size_t blockSize = getBlockSize();
380  auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
381  LO startRow = localRowIndex * blockSize;
382  LO endRow = startRow + blockSize;
383  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
384  colIndex);
385  }
386 }
387 
388 template <class Scalar, class LO, class GO, class Node>
389 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
390 BlockMultiVector<Scalar, LO, GO, Node>::
391  getMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
392  using Teuchos::rcp;
393  using Teuchos::rcpFromRef;
394 
395  // The source object of an Import or Export must be a
396  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
397  // them in that order; one must succeed. Note that the target of
398  // the Import or Export calls checkSizes in DistObject's doTransfer.
399  typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
400  const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*>(&src);
401  if (srcBlkVec == nullptr) {
402  const mv_type* srcMultiVec = dynamic_cast<const mv_type*>(&src);
403  if (srcMultiVec == nullptr) {
404  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
405  // currently does a shallow copy by default. This is why we
406  // return by RCP, rather than by value.
407  return rcp(new mv_type());
408  } else { // src is a MultiVector
409  return rcp(srcMultiVec, false); // nonowning RCP
410  }
411  } else { // src is a BlockMultiVector
412  return rcpFromRef(srcBlkVec->mv_); // nonowning RCP
413  }
414 }
415 
416 template <class Scalar, class LO, class GO, class Node>
419  return !getMultiVectorFromSrcDistObject(src).is_null();
420 }
421 
422 template <class Scalar, class LO, class GO, class Node>
425  const size_t numSameIDs,
426  const Kokkos::DualView<const local_ordinal_type*,
427  buffer_device_type>& permuteToLIDs,
428  const Kokkos::DualView<const local_ordinal_type*,
429  buffer_device_type>& permuteFromLIDs,
430  const CombineMode CM) {
431  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
432  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
433  "instead, create a point importer using makePointMap function.");
434 }
435 
436 template <class Scalar, class LO, class GO, class Node>
439  const Kokkos::DualView<const local_ordinal_type*,
440  buffer_device_type>& exportLIDs,
441  Kokkos::DualView<packet_type*,
442  buffer_device_type>& exports,
443  Kokkos::DualView<size_t*,
445  numPacketsPerLID,
446  size_t& constantNumPackets) {
447  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
448  "Tpetra::BlockMultiVector::packAndPrepare: Do NOT use this; "
449  "instead, create a point importer using makePointMap function.");
450 }
451 
452 template <class Scalar, class LO, class GO, class Node>
454  unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
455  buffer_device_type>& importLIDs,
456  Kokkos::DualView<packet_type*,
458  imports,
459  Kokkos::DualView<size_t*,
461  numPacketsPerLID,
462  const size_t constantNumPackets,
463  const CombineMode combineMode) {
464  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
465  "Tpetra::BlockMultiVector::unpackAndCombine: Do NOT use this; "
466  "instead, create a point importer using makePointMap function.");
467 }
468 
469 template <class Scalar, class LO, class GO, class Node>
471  isValidLocalMeshIndex(const LO meshLocalIndex) const {
472  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid() &&
473  meshMap_.isNodeLocalElement(meshLocalIndex);
474 }
475 
476 template <class Scalar, class LO, class GO, class Node>
478  putScalar(const Scalar& val) {
479  mv_.putScalar(val);
480 }
481 
482 template <class Scalar, class LO, class GO, class Node>
484  scale(const Scalar& val) {
485  mv_.scale(val);
486 }
487 
488 template <class Scalar, class LO, class GO, class Node>
490  update(const Scalar& alpha,
492  const Scalar& beta) {
493  mv_.update(alpha, X.mv_, beta);
494 }
495 
496 namespace Impl {
497 // Y := alpha * D * X
498 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
499 struct BlockWiseMultiply {
500  typedef typename ViewD::size_type Size;
501 
502  private:
503  typedef typename ViewD::device_type Device;
504  typedef typename ViewD::non_const_value_type ImplScalar;
505  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
506 
507  template <typename View>
508  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
509  typename View::device_type, Unmanaged>;
510  typedef UnmanagedView<ViewY> UnMViewY;
511  typedef UnmanagedView<ViewD> UnMViewD;
512  typedef UnmanagedView<ViewX> UnMViewX;
513 
514  const Size block_size_;
515  Scalar alpha_;
516  UnMViewY Y_;
517  UnMViewD D_;
518  UnMViewX X_;
519 
520  public:
521  BlockWiseMultiply(const Size block_size, const Scalar& alpha,
522  const ViewY& Y, const ViewD& D, const ViewX& X)
523  : block_size_(block_size)
524  , alpha_(alpha)
525  , Y_(Y)
526  , D_(D)
527  , X_(X) {}
528 
529  KOKKOS_INLINE_FUNCTION
530  void operator()(const Size k) const {
531 #if KOKKOS_VERSION >= 40799
532  const auto zero = KokkosKernels::ArithTraits<Scalar>::zero();
533 #else
534  const auto zero = Kokkos::ArithTraits<Scalar>::zero();
535 #endif
536  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL(), Kokkos::ALL());
537  const auto num_vecs = X_.extent(1);
538  for (Size i = 0; i < num_vecs; ++i) {
539  Kokkos::pair<Size, Size> kslice(k * block_size_, (k + 1) * block_size_);
540  auto X_curBlk = Kokkos::subview(X_, kslice, i);
541  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
542  // Y_curBlk := alpha * D_curBlk * X_curBlk.
543  // Recall that GEMV does an update (+=) of the last argument.
544  Tpetra::FILL(Y_curBlk, zero);
545  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
546  }
547  }
548 };
549 
550 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
551 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
552 createBlockWiseMultiply(const int block_size, const Scalar& alpha,
553  const ViewY& Y, const ViewD& D, const ViewX& X) {
554  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
555 }
556 
557 template <typename ViewY,
558  typename Scalar,
559  typename ViewD,
560  typename ViewZ,
561  typename LO = typename ViewY::size_type>
562 class BlockJacobiUpdate {
563  private:
564  typedef typename ViewD::device_type Device;
565  typedef typename ViewD::non_const_value_type ImplScalar;
566  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
567 
568  template <typename ViewType>
569  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
570  typename ViewType::array_layout,
571  typename ViewType::device_type,
572  Unmanaged>;
573  typedef UnmanagedView<ViewY> UnMViewY;
574  typedef UnmanagedView<ViewD> UnMViewD;
575  typedef UnmanagedView<ViewZ> UnMViewZ;
576 
577  const LO blockSize_;
578  UnMViewY Y_;
579  const Scalar alpha_;
580  UnMViewD D_;
581  UnMViewZ Z_;
582  const Scalar beta_;
583 
584  public:
585  BlockJacobiUpdate(const ViewY& Y,
586  const Scalar& alpha,
587  const ViewD& D,
588  const ViewZ& Z,
589  const Scalar& beta)
590  : blockSize_(D.extent(1))
591  ,
592  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
593  Y_(Y)
594  , alpha_(alpha)
595  , D_(D)
596  , Z_(Z)
597  , beta_(beta) {
598  static_assert(static_cast<int>(ViewY::rank) == 1,
599  "Y must have rank 1.");
600  static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
601  static_assert(static_cast<int>(ViewZ::rank) == 1,
602  "Z must have rank 1.");
603  // static_assert (static_cast<int> (ViewZ::rank) ==
604  // static_cast<int> (ViewY::rank),
605  // "Z must have the same rank as Y.");
606  }
607 
608  KOKKOS_INLINE_FUNCTION void
609  operator()(const LO& k) const {
610  using Kokkos::ALL;
611  using Kokkos::subview;
612  typedef Kokkos::pair<LO, LO> range_type;
613 #if KOKKOS_VERSION >= 40799
614  typedef KokkosKernels::ArithTraits<Scalar> KAT;
615 #else
616  typedef Kokkos::ArithTraits<Scalar> KAT;
617 #endif
618 
619  // We only have to implement the alpha != 0 case.
620 
621  auto D_curBlk = subview(D_, k, ALL(), ALL());
622  const range_type kslice(k * blockSize_, (k + 1) * blockSize_);
623 
624  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
625 
626  auto Z_curBlk = subview(Z_, kslice);
627  auto Y_curBlk = subview(Y_, kslice);
628  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
629  if (beta_ == KAT::zero()) {
630  Tpetra::FILL(Y_curBlk, KAT::zero());
631  } else if (beta_ != KAT::one()) {
632  Tpetra::SCAL(beta_, Y_curBlk);
633  }
634  Tpetra::GEMV(alpha_, D_curBlk, Z_curBlk, Y_curBlk);
635  }
636 };
637 
638 template <class ViewY,
639  class Scalar,
640  class ViewD,
641  class ViewZ,
642  class LO = typename ViewD::size_type>
643 void blockJacobiUpdate(const ViewY& Y,
644  const Scalar& alpha,
645  const ViewD& D,
646  const ViewZ& Z,
647  const Scalar& beta) {
648  static_assert(Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
649  static_assert(Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
650  static_assert(Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
651  static_assert(static_cast<int>(ViewY::rank) == static_cast<int>(ViewZ::rank),
652  "Y and Z must have the same rank.");
653  static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
654 
655  const auto lclNumMeshRows = D.extent(0);
656 
657 #ifdef HAVE_TPETRA_DEBUG
658  // D.extent(0) is the (local) number of mesh rows.
659  // D.extent(1) is the block size. Thus, their product should be
660  // the local number of point rows, that is, the number of rows in Y.
661  const auto blkSize = D.extent(1);
662  const auto lclNumPtRows = lclNumMeshRows * blkSize;
663  TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != lclNumPtRows, std::invalid_argument,
664  "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
665  "D.extent(0)*D.extent(1) = "
666  << lclNumMeshRows << " * " << blkSize
667  << " = " << lclNumPtRows << ".");
668  TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != Z.extent(0), std::invalid_argument,
669  "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
670  "Z.extent(0) = "
671  << Z.extent(0) << ".");
672  TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(1) != Z.extent(1), std::invalid_argument,
673  "blockJacobiUpdate: Y.extent(1) = " << Y.extent(1) << " != "
674  "Z.extent(1) = "
675  << Z.extent(1) << ".");
676 #endif // HAVE_TPETRA_DEBUG
677 
678  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor(Y, alpha, D, Z, beta);
679  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
680  // lclNumMeshRows must fit in LO, else the Map would not be correct.
681  range_type range(0, static_cast<LO>(lclNumMeshRows));
682  Kokkos::parallel_for(range, functor);
683 }
684 
685 } // namespace Impl
686 
687 template <class Scalar, class LO, class GO, class Node>
689  blockWiseMultiply(const Scalar& alpha,
690  const Kokkos::View<const impl_scalar_type***,
691  device_type, Kokkos::MemoryUnmanaged>& D,
693  using Kokkos::ALL;
694  typedef typename device_type::execution_space exec_space;
695  const LO lclNumMeshRows = meshMap_.getLocalNumElements();
696 
697  if (alpha == STS::zero()) {
698  this->putScalar(STS::zero());
699  } else { // alpha != 0
700  const LO blockSize = this->getBlockSize();
701  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type>(alpha);
702  auto X_lcl = X.mv_.getLocalViewDevice(Access::ReadOnly);
703  auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
704  auto bwm = Impl::createBlockWiseMultiply(blockSize, alphaImpl, Y_lcl, D, X_lcl);
705 
706  // Use an explicit RangePolicy with the desired execution space.
707  // Otherwise, if you just use a number, it runs on the default
708  // execution space. For example, if the default execution space
709  // is Cuda but the current execution space is Serial, using just a
710  // number would incorrectly run with Cuda.
711  Kokkos::RangePolicy<exec_space, LO> range(0, lclNumMeshRows);
712  Kokkos::parallel_for(range, bwm);
713  }
714 }
715 
716 template <class Scalar, class LO, class GO, class Node>
718  blockJacobiUpdate(const Scalar& alpha,
719  const Kokkos::View<const impl_scalar_type***,
720  device_type, Kokkos::MemoryUnmanaged>& D,
723  const Scalar& beta) {
724  using Kokkos::ALL;
725  using Kokkos::subview;
726  typedef impl_scalar_type IST;
727 
728  const IST alphaImpl = static_cast<IST>(alpha);
729  const IST betaImpl = static_cast<IST>(beta);
730  const LO numVecs = mv_.getNumVectors();
731 
732  if (alpha == STS::zero()) { // Y := beta * Y
733  this->scale(beta);
734  } else { // alpha != 0
735  Z.update(STS::one(), X, -STS::one());
736  for (LO j = 0; j < numVecs; ++j) {
737  auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
738  auto Z_lcl = Z.mv_.getLocalViewDevice(Access::ReadWrite);
739  auto Y_lcl_j = subview(Y_lcl, ALL(), j);
740  auto Z_lcl_j = subview(Z_lcl, ALL(), j);
741  Impl::blockJacobiUpdate(Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
742  }
743  }
744 }
745 
746 } // namespace Tpetra
747 
748 //
749 // Explicit instantiation macro
750 //
751 // Must be expanded from within the Tpetra namespace!
752 //
753 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S, LO, GO, NODE) \
754  template class BlockMultiVector<S, LO, GO, NODE>;
755 
756 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
LO local_ordinal_type
The type of local indices.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
Linear algebra kernels for small dense matrices and vectors.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector&#39;s local data on device. This requires that th...
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.