Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
12 
13 #include "Tpetra_BlockMultiVector_decl.hpp"
15 #include "Tpetra_BlockView.hpp"
16 #include "Teuchos_OrdinalTraits.hpp"
17 
18 
19 namespace Tpetra {
20 
21 template<class Scalar, class LO, class GO, class Node>
22 const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type &
25 {
26  return mv_;
27 }
28 
29 template<class Scalar, class LO, class GO, class Node>
33 {
34  return mv_;
35 }
36 
37 template<class Scalar, class LO, class GO, class Node>
38 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
39 BlockMultiVector<Scalar, LO, GO, Node>::
40 getBlockMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
41 {
42  typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
43  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
44  TEUCHOS_TEST_FOR_EXCEPTION(
45  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
46  "BlockMultiVector: The source object of an Import or Export to a "
47  "BlockMultiVector, must also be a BlockMultiVector.");
48  return Teuchos::rcp (src_bmv, false); // nonowning RCP
49 }
50 
51 template<class Scalar, class LO, class GO, class Node>
54  const Teuchos::DataAccess copyOrView) :
55  dist_object_type (in),
56  meshMap_ (in.meshMap_),
57  pointMap_ (in.pointMap_),
58  mv_ (in.mv_, copyOrView),
59  blockSize_ (in.blockSize_)
60 {}
61 
62 template<class Scalar, class LO, class GO, class Node>
64 BlockMultiVector (const map_type& meshMap,
65  const LO blockSize,
66  const LO numVecs) :
67  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
68  meshMap_ (meshMap),
69  pointMap_ (makePointMapRCP (meshMap, blockSize)),
70  mv_ (pointMap_, numVecs), // nonowning RCP is OK, since pointMap_ won't go away
71  blockSize_ (blockSize)
72 {}
73 
74 template<class Scalar, class LO, class GO, class Node>
76 BlockMultiVector (const map_type& meshMap,
77  const map_type& pointMap,
78  const LO blockSize,
79  const LO numVecs) :
80  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
81  meshMap_ (meshMap),
82  pointMap_ (new map_type(pointMap)),
83  mv_ (pointMap_, numVecs),
84  blockSize_ (blockSize)
85 {}
86 
87 template<class Scalar, class LO, class GO, class Node>
90  const map_type& meshMap,
91  const LO blockSize) :
92  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
93  meshMap_ (meshMap),
94  blockSize_ (blockSize)
95 {
96  using Teuchos::RCP;
97 
98  if (X_mv.getCopyOrView () == Teuchos::View) {
99  // The input MultiVector has view semantics, so assignment just
100  // does a shallow copy.
101  mv_ = X_mv;
102  }
103  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
104  // The input MultiVector has copy semantics. We can't change
105  // that, but we can make a view of the input MultiVector and
106  // change the view to have view semantics. (That sounds silly;
107  // shouldn't views always have view semantics? but remember that
108  // "view semantics" just governs the default behavior of the copy
109  // constructor and assignment operator.)
110  RCP<const mv_type> X_view_const;
111  const size_t numCols = X_mv.getNumVectors ();
112  if (numCols == 0) {
113  Teuchos::Array<size_t> cols (0); // view will have zero columns
114  X_view_const = X_mv.subView (cols ());
115  } else { // Range1D is an inclusive range
116  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
117  }
118  TEUCHOS_TEST_FOR_EXCEPTION(
119  X_view_const.is_null (), std::logic_error, "Tpetra::"
120  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
121  "should never happen. Please report this bug to the Tpetra developers.");
122 
123  // It's perfectly OK to cast away const here. Those view methods
124  // should be marked const anyway, because views can't change the
125  // allocation (just the entries themselves).
126  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
127  TEUCHOS_TEST_FOR_EXCEPTION(
128  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
129  "BlockMultiVector constructor: We just set a MultiVector "
130  "to have view semantics, but it claims that it doesn't have view "
131  "semantics. This should never happen. "
132  "Please report this bug to the Tpetra developers.");
133  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
134  }
135 
136  // At this point, mv_ has been assigned, so we can ignore X_mv.
137  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
138  pointMap_ = pointMap;
139 
140 }
141 
142 template<class Scalar, class LO, class GO, class Node>
145  const map_type& newMeshMap,
146  const map_type& newPointMap,
147  const size_t offset) :
148  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
149  meshMap_ (newMeshMap),
150  pointMap_ (new map_type(newPointMap)),
151  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
152  blockSize_ (X.getBlockSize ())
153 {}
154 
155 template<class Scalar, class LO, class GO, class Node>
158  const map_type& newMeshMap,
159  const size_t offset) :
160  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
161  meshMap_ (newMeshMap),
162  pointMap_ (makePointMapRCP (newMeshMap, X.getBlockSize ())),
163  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
164  blockSize_ (X.getBlockSize ())
165 {}
166 
167 template<class Scalar, class LO, class GO, class Node>
170  dist_object_type (Teuchos::null),
171  blockSize_ (0)
172 {}
173 
174 template<class Scalar, class LO, class GO, class Node>
177 makePointMap (const map_type& meshMap, const LO blockSize)
178 {
179 typedef Tpetra::global_size_t GST;
180  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
181 
182  const GST gblNumMeshMapInds =
183  static_cast<GST> (meshMap.getGlobalNumElements ());
184  const size_t lclNumMeshMapIndices =
185  static_cast<size_t> (meshMap.getLocalNumElements ());
186  const GST gblNumPointMapInds =
187  gblNumMeshMapInds * static_cast<GST> (blockSize);
188  const size_t lclNumPointMapInds =
189  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
190  const GO indexBase = meshMap.getIndexBase ();
191 
192  if (meshMap.isContiguous ()) {
193  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
194  meshMap.getComm ());
195  }
196  else {
197  // "Hilbert's Hotel" trick: multiply each process' GIDs by
198  // blockSize, and fill in. That ensures correctness even if the
199  // mesh Map is overlapping.
200  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
201  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
202  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
203  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
204  const GO meshGid = lclMeshGblInds[g];
205  const GO pointGidStart = indexBase +
206  (meshGid - indexBase) * static_cast<GO> (blockSize);
207  const size_type offset = g * static_cast<size_type> (blockSize);
208  for (LO k = 0; k < blockSize; ++k) {
209  const GO pointGid = pointGidStart + static_cast<GO> (k);
210  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
211  }
212  }
213  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
214  meshMap.getComm ());
215 
216  }
217 }
218 
219 
220 template<class Scalar, class LO, class GO, class Node>
221 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
223 makePointMapRCP (const map_type& meshMap, const LO blockSize)
224 {
225 typedef Tpetra::global_size_t GST;
226  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
227 
228  const GST gblNumMeshMapInds =
229  static_cast<GST> (meshMap.getGlobalNumElements ());
230  const size_t lclNumMeshMapIndices =
231  static_cast<size_t> (meshMap.getLocalNumElements ());
232  const GST gblNumPointMapInds =
233  gblNumMeshMapInds * static_cast<GST> (blockSize);
234  const size_t lclNumPointMapInds =
235  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
236  const GO indexBase = meshMap.getIndexBase ();
237 
238  if (meshMap.isContiguous ()) {
239  return Teuchos::rcp(new map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
240  meshMap.getComm ()));
241  }
242  else {
243  // "Hilbert's Hotel" trick: multiply each process' GIDs by
244  // blockSize, and fill in. That ensures correctness even if the
245  // mesh Map is overlapping.
246  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
247  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
248  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
249  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
250  const GO meshGid = lclMeshGblInds[g];
251  const GO pointGidStart = indexBase +
252  (meshGid - indexBase) * static_cast<GO> (blockSize);
253  const size_type offset = g * static_cast<size_type> (blockSize);
254  for (LO k = 0; k < blockSize; ++k) {
255  const GO pointGid = pointGidStart + static_cast<GO> (k);
256  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
257  }
258  }
259  return Teuchos::rcp(new map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
260  meshMap.getComm ()));
261 
262  }
263 }
264 
265 template<class Scalar, class LO, class GO, class Node>
266 void
268 replaceLocalValuesImpl (const LO localRowIndex,
269  const LO colIndex,
270  const Scalar vals[])
271 {
272  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
273  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
274  getBlockSize ());
275  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
276  using exec_space = typename device_type::execution_space;
277  Kokkos::deep_copy (exec_space(), X_dst, X_src);
278 }
279 
280 
281 template<class Scalar, class LO, class GO, class Node>
282 bool
284 replaceLocalValues (const LO localRowIndex,
285  const LO colIndex,
286  const Scalar vals[])
287 {
288  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
289  return false;
290  } else {
291  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
292  return true;
293  }
294 }
295 
296 template<class Scalar, class LO, class GO, class Node>
297 bool
299 replaceGlobalValues (const GO globalRowIndex,
300  const LO colIndex,
301  const Scalar vals[])
302 {
303  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
304  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
305  return false;
306  } else {
307  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
308  return true;
309  }
310 }
311 
312 template<class Scalar, class LO, class GO, class Node>
313 void
315 sumIntoLocalValuesImpl (const LO localRowIndex,
316  const LO colIndex,
317  const Scalar vals[])
318 {
319  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
320  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
321  getBlockSize ());
322  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
323 }
324 
325 template<class Scalar, class LO, class GO, class Node>
326 bool
328 sumIntoLocalValues (const LO localRowIndex,
329  const LO colIndex,
330  const Scalar vals[])
331 {
332  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
333  return false;
334  } else {
335  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
336  return true;
337  }
338 }
339 
340 template<class Scalar, class LO, class GO, class Node>
341 bool
343 sumIntoGlobalValues (const GO globalRowIndex,
344  const LO colIndex,
345  const Scalar vals[])
346 {
347  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
348  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
349  return false;
350  } else {
351  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
352  return true;
353  }
354 }
355 
356 
357 template<class Scalar, class LO, class GO, class Node>
358 typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
360 getLocalBlockHost (const LO localRowIndex,
361  const LO colIndex,
362  const Access::ReadOnlyStruct) const
363 {
364  if (!isValidLocalMeshIndex(localRowIndex)) {
365  return const_little_host_vec_type();
366  } else {
367  const size_t blockSize = getBlockSize();
368  auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
369  LO startRow = localRowIndex*blockSize;
370  LO endRow = startRow + blockSize;
371  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
372  colIndex);
373  }
374 }
375 
376 template<class Scalar, class LO, class GO, class Node>
377 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
378 BlockMultiVector<Scalar, LO, GO, Node>::
379 getLocalBlockHost (const LO localRowIndex,
380  const LO colIndex,
381  const Access::OverwriteAllStruct)
382 {
383  if (!isValidLocalMeshIndex(localRowIndex)) {
384  return little_host_vec_type();
385  } else {
386  const size_t blockSize = getBlockSize();
387  auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
388  LO startRow = localRowIndex*blockSize;
389  LO endRow = startRow + blockSize;
390  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
391  colIndex);
392  }
393 }
394 
395 template<class Scalar, class LO, class GO, class Node>
396 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
398 getLocalBlockHost (const LO localRowIndex,
399  const LO colIndex,
400  const Access::ReadWriteStruct)
401 {
402  if (!isValidLocalMeshIndex(localRowIndex)) {
403  return little_host_vec_type();
404  } else {
405  const size_t blockSize = getBlockSize();
406  auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
407  LO startRow = localRowIndex*blockSize;
408  LO endRow = startRow + blockSize;
409  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
410  colIndex);
411  }
412 }
413 
414 template<class Scalar, class LO, class GO, class Node>
415 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
416 BlockMultiVector<Scalar, LO, GO, Node>::
417 getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
418 {
419  using Teuchos::rcp;
420  using Teuchos::rcpFromRef;
421 
422  // The source object of an Import or Export must be a
423  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
424  // them in that order; one must succeed. Note that the target of
425  // the Import or Export calls checkSizes in DistObject's doTransfer.
426  typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
427  const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
428  if (srcBlkVec == nullptr) {
429  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
430  if (srcMultiVec == nullptr) {
431  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
432  // currently does a shallow copy by default. This is why we
433  // return by RCP, rather than by value.
434  return rcp (new mv_type ());
435  } else { // src is a MultiVector
436  return rcp (srcMultiVec, false); // nonowning RCP
437  }
438  } else { // src is a BlockMultiVector
439  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
440  }
441 }
442 
443 template<class Scalar, class LO, class GO, class Node>
446 {
447  return ! getMultiVectorFromSrcDistObject (src).is_null ();
448 }
449 
450 template<class Scalar, class LO, class GO, class Node>
453 (const SrcDistObject& src,
454  const size_t numSameIDs,
455  const Kokkos::DualView<const local_ordinal_type*,
456  buffer_device_type>& permuteToLIDs,
457  const Kokkos::DualView<const local_ordinal_type*,
458  buffer_device_type>& permuteFromLIDs,
459  const CombineMode CM)
460 {
461  TEUCHOS_TEST_FOR_EXCEPTION
462  (true, std::logic_error,
463  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
464  "instead, create a point importer using makePointMap function.");
465 }
466 
467 template<class Scalar, class LO, class GO, class Node>
470 (const SrcDistObject& src,
471  const Kokkos::DualView<const local_ordinal_type*,
472  buffer_device_type>& exportLIDs,
473  Kokkos::DualView<packet_type*,
474  buffer_device_type>& exports,
475  Kokkos::DualView<size_t*,
476  buffer_device_type> numPacketsPerLID,
477  size_t& constantNumPackets)
478 {
479  TEUCHOS_TEST_FOR_EXCEPTION
480  (true, std::logic_error,
481  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
482  "instead, create a point importer using makePointMap function.");
483 }
484 
485 template<class Scalar, class LO, class GO, class Node>
488 (const Kokkos::DualView<const local_ordinal_type*,
489  buffer_device_type>& importLIDs,
490  Kokkos::DualView<packet_type*,
491  buffer_device_type> imports,
492  Kokkos::DualView<size_t*,
493  buffer_device_type> numPacketsPerLID,
494  const size_t constantNumPackets,
495  const CombineMode combineMode)
496 {
497  TEUCHOS_TEST_FOR_EXCEPTION
498  (true, std::logic_error,
499  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
500  "instead, create a point importer using makePointMap function.");
501 }
502 
503 template<class Scalar, class LO, class GO, class Node>
505 isValidLocalMeshIndex (const LO meshLocalIndex) const
506 {
507  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
508  meshMap_.isNodeLocalElement (meshLocalIndex);
509 }
510 
511 template<class Scalar, class LO, class GO, class Node>
513 putScalar (const Scalar& val)
514 {
515  mv_.putScalar (val);
516 }
517 
518 template<class Scalar, class LO, class GO, class Node>
520 scale (const Scalar& val)
521 {
522  mv_.scale (val);
523 }
524 
525 template<class Scalar, class LO, class GO, class Node>
527 update (const Scalar& alpha,
529  const Scalar& beta)
530 {
531  mv_.update (alpha, X.mv_, beta);
532 }
533 
534 namespace Impl {
535 // Y := alpha * D * X
536 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
537 struct BlockWiseMultiply {
538  typedef typename ViewD::size_type Size;
539 
540 private:
541  typedef typename ViewD::device_type Device;
542  typedef typename ViewD::non_const_value_type ImplScalar;
543  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
544 
545  template <typename View>
546  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
547  typename View::device_type, Unmanaged>;
548  typedef UnmanagedView<ViewY> UnMViewY;
549  typedef UnmanagedView<ViewD> UnMViewD;
550  typedef UnmanagedView<ViewX> UnMViewX;
551 
552  const Size block_size_;
553  Scalar alpha_;
554  UnMViewY Y_;
555  UnMViewD D_;
556  UnMViewX X_;
557 
558 public:
559  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
560  const ViewY& Y, const ViewD& D, const ViewX& X)
561  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
562  {}
563 
564  KOKKOS_INLINE_FUNCTION
565  void operator() (const Size k) const {
566  const auto zero = Kokkos::ArithTraits<Scalar>::zero();
567  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
568  const auto num_vecs = X_.extent(1);
569  for (Size i = 0; i < num_vecs; ++i) {
570  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
571  auto X_curBlk = Kokkos::subview(X_, kslice, i);
572  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
573  // Y_curBlk := alpha * D_curBlk * X_curBlk.
574  // Recall that GEMV does an update (+=) of the last argument.
575  Tpetra::FILL(Y_curBlk, zero);
576  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
577  }
578  }
579 };
580 
581 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
582 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
583 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
584  const ViewY& Y, const ViewD& D, const ViewX& X) {
585  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
586 }
587 
588 template <typename ViewY,
589  typename Scalar,
590  typename ViewD,
591  typename ViewZ,
592  typename LO = typename ViewY::size_type>
593 class BlockJacobiUpdate {
594 private:
595  typedef typename ViewD::device_type Device;
596  typedef typename ViewD::non_const_value_type ImplScalar;
597  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
598 
599  template <typename ViewType>
600  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
601  typename ViewType::array_layout,
602  typename ViewType::device_type,
603  Unmanaged>;
604  typedef UnmanagedView<ViewY> UnMViewY;
605  typedef UnmanagedView<ViewD> UnMViewD;
606  typedef UnmanagedView<ViewZ> UnMViewZ;
607 
608  const LO blockSize_;
609  UnMViewY Y_;
610  const Scalar alpha_;
611  UnMViewD D_;
612  UnMViewZ Z_;
613  const Scalar beta_;
614 
615 public:
616  BlockJacobiUpdate (const ViewY& Y,
617  const Scalar& alpha,
618  const ViewD& D,
619  const ViewZ& Z,
620  const Scalar& beta) :
621  blockSize_ (D.extent (1)),
622  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
623  Y_ (Y),
624  alpha_ (alpha),
625  D_ (D),
626  Z_ (Z),
627  beta_ (beta)
628  {
629  static_assert (static_cast<int> (ViewY::rank) == 1,
630  "Y must have rank 1.");
631  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
632  static_assert (static_cast<int> (ViewZ::rank) == 1,
633  "Z must have rank 1.");
634  // static_assert (static_cast<int> (ViewZ::rank) ==
635  // static_cast<int> (ViewY::rank),
636  // "Z must have the same rank as Y.");
637  }
638 
639  KOKKOS_INLINE_FUNCTION void
640  operator() (const LO& k) const
641  {
642  using Kokkos::ALL;
643  using Kokkos::subview;
644  typedef Kokkos::pair<LO, LO> range_type;
645  typedef Kokkos::ArithTraits<Scalar> KAT;
646 
647  // We only have to implement the alpha != 0 case.
648 
649  auto D_curBlk = subview (D_, k, ALL (), ALL ());
650  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
651 
652  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
653 
654  auto Z_curBlk = subview (Z_, kslice);
655  auto Y_curBlk = subview (Y_, kslice);
656  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
657  if (beta_ == KAT::zero ()) {
658  Tpetra::FILL (Y_curBlk, KAT::zero ());
659  }
660  else if (beta_ != KAT::one ()) {
661  Tpetra::SCAL (beta_, Y_curBlk);
662  }
663  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
664  }
665 };
666 
667 template<class ViewY,
668  class Scalar,
669  class ViewD,
670  class ViewZ,
671  class LO = typename ViewD::size_type>
672 void
673 blockJacobiUpdate (const ViewY& Y,
674  const Scalar& alpha,
675  const ViewD& D,
676  const ViewZ& Z,
677  const Scalar& beta)
678 {
679  static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
680  static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
681  static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
682  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
683  "Y and Z must have the same rank.");
684  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
685 
686  const auto lclNumMeshRows = D.extent (0);
687 
688 #ifdef HAVE_TPETRA_DEBUG
689  // D.extent(0) is the (local) number of mesh rows.
690  // D.extent(1) is the block size. Thus, their product should be
691  // the local number of point rows, that is, the number of rows in Y.
692  const auto blkSize = D.extent (1);
693  const auto lclNumPtRows = lclNumMeshRows * blkSize;
694  TEUCHOS_TEST_FOR_EXCEPTION
695  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
696  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
697  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
698  << " = " << lclNumPtRows << ".");
699  TEUCHOS_TEST_FOR_EXCEPTION
700  (Y.extent (0) != Z.extent (0), std::invalid_argument,
701  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
702  "Z.extent(0) = " << Z.extent (0) << ".");
703  TEUCHOS_TEST_FOR_EXCEPTION
704  (Y.extent (1) != Z.extent (1), std::invalid_argument,
705  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
706  "Z.extent(1) = " << Z.extent (1) << ".");
707 #endif // HAVE_TPETRA_DEBUG
708 
709  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
710  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
711  // lclNumMeshRows must fit in LO, else the Map would not be correct.
712  range_type range (0, static_cast<LO> (lclNumMeshRows));
713  Kokkos::parallel_for (range, functor);
714 }
715 
716 } // namespace Impl
717 
718 template<class Scalar, class LO, class GO, class Node>
720 blockWiseMultiply (const Scalar& alpha,
721  const Kokkos::View<const impl_scalar_type***,
722  device_type, Kokkos::MemoryUnmanaged>& D,
724 {
725  using Kokkos::ALL;
726  typedef typename device_type::execution_space exec_space;
727  const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
728 
729  if (alpha == STS::zero ()) {
730  this->putScalar (STS::zero ());
731  }
732  else { // alpha != 0
733  const LO blockSize = this->getBlockSize ();
734  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
735  auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
736  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
737  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
738 
739  // Use an explicit RangePolicy with the desired execution space.
740  // Otherwise, if you just use a number, it runs on the default
741  // execution space. For example, if the default execution space
742  // is Cuda but the current execution space is Serial, using just a
743  // number would incorrectly run with Cuda.
744  Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
745  Kokkos::parallel_for (range, bwm);
746  }
747 }
748 
749 template<class Scalar, class LO, class GO, class Node>
751 blockJacobiUpdate (const Scalar& alpha,
752  const Kokkos::View<const impl_scalar_type***,
753  device_type, Kokkos::MemoryUnmanaged>& D,
756  const Scalar& beta)
757 {
758  using Kokkos::ALL;
759  using Kokkos::subview;
760  typedef impl_scalar_type IST;
761 
762  const IST alphaImpl = static_cast<IST> (alpha);
763  const IST betaImpl = static_cast<IST> (beta);
764  const LO numVecs = mv_.getNumVectors ();
765 
766  if (alpha == STS::zero ()) { // Y := beta * Y
767  this->scale (beta);
768  }
769  else { // alpha != 0
770  Z.update (STS::one (), X, -STS::one ());
771  for (LO j = 0; j < numVecs; ++j) {
772  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
773  auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
774  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
775  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
776  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
777  }
778  }
779 }
780 
781 } // namespace Tpetra
782 
783 //
784 // Explicit instantiation macro
785 //
786 // Must be expanded from within the Tpetra namespace!
787 //
788 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
789  template class BlockMultiVector< S, LO, GO, NODE >;
790 
791 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
LO local_ordinal_type
The type of local indices.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
Linear algebra kernels for small dense matrices and vectors.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector&#39;s local data on device. This requires that th...
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.