Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
44 
46 #include "Tpetra_BlockView.hpp"
47 #include "Teuchos_OrdinalTraits.hpp"
48 
49 namespace { // anonymous
50 
62  template<class MultiVectorType>
63  struct RawHostPtrFromMultiVector {
64  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
65 
66  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
67  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
68  // host as modified. This is on purpose, because we don't want
69  // the BlockMultiVector sync'd to host unnecessarily.
70  // Otherwise, all the MultiVector and BlockMultiVector kernels
71  // would run on host instead of device. See Github Issue #428.
72  auto X_view_host = X.template getLocalView<typename MultiVectorType::dual_view_type::t_host::device_type> ();
73  impl_scalar_type* X_raw = X_view_host.data ();
74  return X_raw;
75  }
76  };
77 
90  template<class S, class LO, class GO, class N>
92  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
94  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
95  }
96 
97 } // namespace (anonymous)
98 
99 namespace Tpetra {
100 
101 template<class Scalar, class LO, class GO, class Node>
102 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
105 {
106  return mv_;
107 }
108 
109 template<class Scalar, class LO, class GO, class Node>
110 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
113 {
115  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
116  TEUCHOS_TEST_FOR_EXCEPTION(
117  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
118  "BlockMultiVector: The source object of an Import or Export to a "
119  "BlockMultiVector, must also be a BlockMultiVector.");
120  return Teuchos::rcp (src_bmv, false); // nonowning RCP
121 }
122 
123 template<class Scalar, class LO, class GO, class Node>
126  const Teuchos::DataAccess copyOrView) :
127  dist_object_type (in),
128  meshMap_ (in.meshMap_),
129  pointMap_ (in.pointMap_),
130  mv_ (in.mv_, copyOrView),
131  mvData_ (getRawHostPtrFromMultiVector (mv_)),
132  blockSize_ (in.blockSize_)
133 {}
134 
135 template<class Scalar, class LO, class GO, class Node>
137 BlockMultiVector (const map_type& meshMap,
138  const LO blockSize,
139  const LO numVecs) :
140  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
141  meshMap_ (meshMap),
142  pointMap_ (makePointMap (meshMap, blockSize)),
143  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
144  mvData_ (getRawHostPtrFromMultiVector (mv_)),
145  blockSize_ (blockSize)
146 {}
147 
148 template<class Scalar, class LO, class GO, class Node>
150 BlockMultiVector (const map_type& meshMap,
151  const map_type& pointMap,
152  const LO blockSize,
153  const LO numVecs) :
154  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
155  meshMap_ (meshMap),
156  pointMap_ (pointMap),
157  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
158  mvData_ (getRawHostPtrFromMultiVector (mv_)),
159  blockSize_ (blockSize)
160 {}
161 
162 template<class Scalar, class LO, class GO, class Node>
165  const map_type& meshMap,
166  const LO blockSize) :
167  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
168  meshMap_ (meshMap),
169  mvData_ (nullptr), // just for now
170  blockSize_ (blockSize)
171 {
172  using Teuchos::RCP;
173 
174  if (X_mv.getCopyOrView () == Teuchos::View) {
175  // The input MultiVector has view semantics, so assignment just
176  // does a shallow copy.
177  mv_ = X_mv;
178  }
179  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
180  // The input MultiVector has copy semantics. We can't change
181  // that, but we can make a view of the input MultiVector and
182  // change the view to have view semantics. (That sounds silly;
183  // shouldn't views always have view semantics? but remember that
184  // "view semantics" just governs the default behavior of the copy
185  // constructor and assignment operator.)
186  RCP<const mv_type> X_view_const;
187  const size_t numCols = X_mv.getNumVectors ();
188  if (numCols == 0) {
189  Teuchos::Array<size_t> cols (0); // view will have zero columns
190  X_view_const = X_mv.subView (cols ());
191  } else { // Range1D is an inclusive range
192  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
193  }
194  TEUCHOS_TEST_FOR_EXCEPTION(
195  X_view_const.is_null (), std::logic_error, "Tpetra::"
196  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
197  "should never happen. Please report this bug to the Tpetra developers.");
198 
199  // It's perfectly OK to cast away const here. Those view methods
200  // should be marked const anyway, because views can't change the
201  // allocation (just the entries themselves).
202  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
203  TEUCHOS_TEST_FOR_EXCEPTION(
204  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
205  "BlockMultiVector constructor: We just set a MultiVector "
206  "to have view semantics, but it claims that it doesn't have view "
207  "semantics. This should never happen. "
208  "Please report this bug to the Tpetra developers.");
209  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
210  }
211 
212  // At this point, mv_ has been assigned, so we can ignore X_mv.
213  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
214  if (! pointMap.is_null ()) {
215  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
216  }
217  mvData_ = getRawHostPtrFromMultiVector (mv_);
218 }
219 
220 template<class Scalar, class LO, class GO, class Node>
223  const map_type& newMeshMap,
224  const map_type& newPointMap,
225  const size_t offset) :
226  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
227  meshMap_ (newMeshMap),
228  pointMap_ (newPointMap),
229  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
230  mvData_ (getRawHostPtrFromMultiVector (mv_)),
231  blockSize_ (X.getBlockSize ())
232 {}
233 
234 template<class Scalar, class LO, class GO, class Node>
237  const map_type& newMeshMap,
238  const size_t offset) :
239  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
240  meshMap_ (newMeshMap),
241  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
242  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
243  mvData_ (getRawHostPtrFromMultiVector (mv_)),
244  blockSize_ (X.getBlockSize ())
245 {}
246 
247 template<class Scalar, class LO, class GO, class Node>
250  dist_object_type (Teuchos::null),
251  mvData_ (nullptr),
252  blockSize_ (0)
253 {}
254 
255 template<class Scalar, class LO, class GO, class Node>
258 makePointMap (const map_type& meshMap, const LO blockSize)
259 {
260  typedef Tpetra::global_size_t GST;
261  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
262 
263  const GST gblNumMeshMapInds =
264  static_cast<GST> (meshMap.getGlobalNumElements ());
265  const size_t lclNumMeshMapIndices =
266  static_cast<size_t> (meshMap.getNodeNumElements ());
267  const GST gblNumPointMapInds =
268  gblNumMeshMapInds * static_cast<GST> (blockSize);
269  const size_t lclNumPointMapInds =
270  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
271  const GO indexBase = meshMap.getIndexBase ();
272 
273  if (meshMap.isContiguous ()) {
274  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
275  meshMap.getComm ());
276  }
277  else {
278  // "Hilbert's Hotel" trick: multiply each process' GIDs by
279  // blockSize, and fill in. That ensures correctness even if the
280  // mesh Map is overlapping.
281  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
282  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
283  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
284  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
285  const GO meshGid = lclMeshGblInds[g];
286  const GO pointGidStart = indexBase +
287  (meshGid - indexBase) * static_cast<GO> (blockSize);
288  const size_type offset = g * static_cast<size_type> (blockSize);
289  for (LO k = 0; k < blockSize; ++k) {
290  const GO pointGid = pointGidStart + static_cast<GO> (k);
291  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
292  }
293  }
294  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
295  meshMap.getComm ());
296  }
297 }
298 
299 
300 template<class Scalar, class LO, class GO, class Node>
301 void
303 replaceLocalValuesImpl (const LO localRowIndex,
304  const LO colIndex,
305  const Scalar vals[]) const
306 {
307  auto X_dst = getLocalBlock (localRowIndex, colIndex);
308  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
309  getBlockSize ());
310  Kokkos::deep_copy (X_dst, X_src);
311 }
312 
313 
314 template<class Scalar, class LO, class GO, class Node>
315 bool
317 replaceLocalValues (const LO localRowIndex,
318  const LO colIndex,
319  const Scalar vals[]) const
320 {
321  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
322  return false;
323  } else {
324  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
325  return true;
326  }
327 }
328 
329 template<class Scalar, class LO, class GO, class Node>
330 bool
332 replaceGlobalValues (const GO globalRowIndex,
333  const LO colIndex,
334  const Scalar vals[]) const
335 {
336  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
337  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
338  return false;
339  } else {
340  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
341  return true;
342  }
343 }
344 
345 template<class Scalar, class LO, class GO, class Node>
346 void
348 sumIntoLocalValuesImpl (const LO localRowIndex,
349  const LO colIndex,
350  const Scalar vals[]) const
351 {
352  auto X_dst = getLocalBlock (localRowIndex, colIndex);
353  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
354  getBlockSize ());
355  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
356 }
357 
358 template<class Scalar, class LO, class GO, class Node>
359 bool
361 sumIntoLocalValues (const LO localRowIndex,
362  const LO colIndex,
363  const Scalar vals[]) const
364 {
365  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
366  return false;
367  } else {
368  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
369  return true;
370  }
371 }
372 
373 template<class Scalar, class LO, class GO, class Node>
374 bool
376 sumIntoGlobalValues (const GO globalRowIndex,
377  const LO colIndex,
378  const Scalar vals[]) const
379 {
380  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
381  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
382  return false;
383  } else {
384  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
385  return true;
386  }
387 }
388 
389 template<class Scalar, class LO, class GO, class Node>
390 bool
392 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
393 {
394  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
395  return false;
396  } else {
397  auto X_ij = getLocalBlock (localRowIndex, colIndex);
398  vals = reinterpret_cast<Scalar*> (X_ij.data ());
399  return true;
400  }
401 }
402 
403 template<class Scalar, class LO, class GO, class Node>
404 bool
406 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
407 {
408  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
409  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
410  return false;
411  } else {
412  auto X_ij = getLocalBlock (localRowIndex, colIndex);
413  vals = reinterpret_cast<Scalar*> (X_ij.data ());
414  return true;
415  }
416 }
417 
418 template<class Scalar, class LO, class GO, class Node>
421 getLocalBlock (const LO localRowIndex,
422  const LO colIndex) const
423 {
424  // NOTE (mfh 07 Jul 2016) It should be correct to add the
425  // commented-out test below. However, I've conservatively commented
426  // it out, since users might not realize that they need to have
427  // things sync'd correctly.
428 
429 // #ifdef HAVE_TPETRA_DEBUG
430 // TEUCHOS_TEST_FOR_EXCEPTION
431 // (mv_.need_sync_host (), std::runtime_error,
432 // "Tpetra::BlockMultiVector::getLocalBlock: This method "
433 // "accesses host data, but the object is not in sync on host." );
434 // #endif // HAVE_TPETRA_DEBUG
435 
436  if (! isValidLocalMeshIndex (localRowIndex)) {
437  return typename little_vec_type::HostMirror ();
438  } else {
439  const size_t blockSize = getBlockSize ();
440  const size_t offset = colIndex * this->getStrideY () +
441  localRowIndex * blockSize;
442  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
443  return typename little_vec_type::HostMirror (blockRaw, blockSize);
444  }
445 }
446 
447 template<class Scalar, class LO, class GO, class Node>
448 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
451 {
452  using Teuchos::rcp;
453  using Teuchos::rcpFromRef;
454 
455  // The source object of an Import or Export must be a
456  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
457  // them in that order; one must succeed. Note that the target of
458  // the Import or Export calls checkSizes in DistObject's doTransfer.
459  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
460  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
461  if (srcBlkVec == nullptr) {
462  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
463  if (srcMultiVec == nullptr) {
464  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
465  // currently does a shallow copy by default. This is why we
466  // return by RCP, rather than by value.
467  return rcp (new mv_type ());
468  } else { // src is a MultiVector
469  return rcp (srcMultiVec, false); // nonowning RCP
470  }
471  } else { // src is a BlockMultiVector
472  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
473  }
474 }
475 
476 template<class Scalar, class LO, class GO, class Node>
479 {
480  return ! getMultiVectorFromSrcDistObject (src).is_null ();
481 }
482 
483 template<class Scalar, class LO, class GO, class Node>
485 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
486 copyAndPermuteNew
487 #else // TPETRA_ENABLE_DEPRECATED_CODE
488 copyAndPermute
489 #endif // TPETRA_ENABLE_DEPRECATED_CODE
490 (const SrcDistObject& src,
491  const size_t numSameIDs,
492  const Kokkos::DualView<const local_ordinal_type*,
493  buffer_device_type>& permuteToLIDs,
494  const Kokkos::DualView<const local_ordinal_type*,
495  buffer_device_type>& permuteFromLIDs)
496 {
497  TEUCHOS_TEST_FOR_EXCEPTION
498  (true, std::logic_error,
499  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
500  "instead, create a point importer using makePointMap function.");
501 }
502 
503 template<class Scalar, class LO, class GO, class Node>
504 void BlockMultiVector<Scalar, LO, GO, Node>::
505 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
506 packAndPrepareNew
507 #else // TPETRA_ENABLE_DEPRECATED_CODE
508 packAndPrepare
509 #endif // TPETRA_ENABLE_DEPRECATED_CODE
510 (const SrcDistObject& src,
511  const Kokkos::DualView<const local_ordinal_type*,
512  buffer_device_type>& exportLIDs,
513  Kokkos::DualView<packet_type*,
514  buffer_device_type>& exports,
515  Kokkos::DualView<size_t*,
516  buffer_device_type> numPacketsPerLID,
517  size_t& constantNumPackets,
518  Distributor& distor)
519 {
520  TEUCHOS_TEST_FOR_EXCEPTION
521  (true, std::logic_error,
522  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
523  "instead, create a point importer using makePointMap function.");
524 }
525 
526 template<class Scalar, class LO, class GO, class Node>
527 void BlockMultiVector<Scalar, LO, GO, Node>::
528 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
529 unpackAndCombineNew
530 #else // TPETRA_ENABLE_DEPRECATED_CODE
531 unpackAndCombine
532 #endif // TPETRA_ENABLE_DEPRECATED_CODE
533 (const Kokkos::DualView<const local_ordinal_type*,
534  buffer_device_type>& importLIDs,
535  Kokkos::DualView<packet_type*,
536  buffer_device_type> imports,
537  Kokkos::DualView<size_t*,
538  buffer_device_type> numPacketsPerLID,
539  const size_t constantNumPackets,
540  Distributor& distor,
541  const CombineMode combineMode)
542 {
543  TEUCHOS_TEST_FOR_EXCEPTION
544  (true, std::logic_error,
545  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
546  "instead, create a point importer using makePointMap function.");
547 }
548 
549 template<class Scalar, class LO, class GO, class Node>
551 isValidLocalMeshIndex (const LO meshLocalIndex) const
552 {
553  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
554  meshMap_.isNodeLocalElement (meshLocalIndex);
555 }
556 
557 template<class Scalar, class LO, class GO, class Node>
559 putScalar (const Scalar& val)
560 {
561  mv_.putScalar (val);
562 }
563 
564 template<class Scalar, class LO, class GO, class Node>
566 scale (const Scalar& val)
567 {
568  mv_.scale (val);
569 }
570 
571 template<class Scalar, class LO, class GO, class Node>
573 update (const Scalar& alpha,
575  const Scalar& beta)
576 {
577  mv_.update (alpha, X.mv_, beta);
578 }
579 
580 namespace Impl {
581 // Y := alpha * D * X
582 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
583 struct BlockWiseMultiply {
584  typedef typename ViewD::size_type Size;
585 
586 private:
587  typedef typename ViewD::device_type Device;
588  typedef typename ViewD::non_const_value_type ImplScalar;
589  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
590 
591  template <typename View>
592  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
593  typename View::device_type, Unmanaged>;
594  typedef UnmanagedView<ViewY> UnMViewY;
595  typedef UnmanagedView<ViewD> UnMViewD;
596  typedef UnmanagedView<ViewX> UnMViewX;
597 
598  const Size block_size_;
599  Scalar alpha_;
600  UnMViewY Y_;
601  UnMViewD D_;
602  UnMViewX X_;
603 
604 public:
605  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
606  const ViewY& Y, const ViewD& D, const ViewX& X)
607  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
608  {}
609 
610  KOKKOS_INLINE_FUNCTION
611  void operator() (const Size k) const {
612  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
613  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
614  const auto num_vecs = X_.extent(1);
615  for (Size i = 0; i < num_vecs; ++i) {
616  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
617  auto X_curBlk = Kokkos::subview(X_, kslice, i);
618  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
619  // Y_curBlk := alpha * D_curBlk * X_curBlk.
620  // Recall that GEMV does an update (+=) of the last argument.
621  Tpetra::FILL(Y_curBlk, zero);
622  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
623  }
624  }
625 };
626 
627 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
628 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
629 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
630  const ViewY& Y, const ViewD& D, const ViewX& X) {
631  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
632 }
633 
634 template <typename ViewY,
635  typename Scalar,
636  typename ViewD,
637  typename ViewZ,
638  typename LO = typename ViewY::size_type>
639 class BlockJacobiUpdate {
640 private:
641  typedef typename ViewD::device_type Device;
642  typedef typename ViewD::non_const_value_type ImplScalar;
643  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
644 
645  template <typename ViewType>
646  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
647  typename ViewType::array_layout,
648  typename ViewType::device_type,
649  Unmanaged>;
650  typedef UnmanagedView<ViewY> UnMViewY;
651  typedef UnmanagedView<ViewD> UnMViewD;
652  typedef UnmanagedView<ViewZ> UnMViewZ;
653 
654  const LO blockSize_;
655  UnMViewY Y_;
656  const Scalar alpha_;
657  UnMViewD D_;
658  UnMViewZ Z_;
659  const Scalar beta_;
660 
661 public:
662  BlockJacobiUpdate (const ViewY& Y,
663  const Scalar& alpha,
664  const ViewD& D,
665  const ViewZ& Z,
666  const Scalar& beta) :
667  blockSize_ (D.extent (1)),
668  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
669  Y_ (Y),
670  alpha_ (alpha),
671  D_ (D),
672  Z_ (Z),
673  beta_ (beta)
674  {
675  static_assert (static_cast<int> (ViewY::rank) == 1,
676  "Y must have rank 1.");
677  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
678  static_assert (static_cast<int> (ViewZ::rank) == 1,
679  "Z must have rank 1.");
680  // static_assert (static_cast<int> (ViewZ::rank) ==
681  // static_cast<int> (ViewY::rank),
682  // "Z must have the same rank as Y.");
683  }
684 
685  KOKKOS_INLINE_FUNCTION void
686  operator() (const LO& k) const
687  {
688  using Kokkos::ALL;
689  using Kokkos::subview;
690  typedef Kokkos::pair<LO, LO> range_type;
691  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
692 
693  // We only have to implement the alpha != 0 case.
694 
695  auto D_curBlk = subview (D_, k, ALL (), ALL ());
696  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
697 
698  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
699 
700  auto Z_curBlk = subview (Z_, kslice);
701  auto Y_curBlk = subview (Y_, kslice);
702  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
703  if (beta_ == KAT::zero ()) {
704  Tpetra::FILL (Y_curBlk, KAT::zero ());
705  }
706  else if (beta_ != KAT::one ()) {
707  Tpetra::SCAL (beta_, Y_curBlk);
708  }
709  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
710  }
711 };
712 
713 template<class ViewY,
714  class Scalar,
715  class ViewD,
716  class ViewZ,
717  class LO = typename ViewD::size_type>
718 void
719 blockJacobiUpdate (const ViewY& Y,
720  const Scalar& alpha,
721  const ViewD& D,
722  const ViewZ& Z,
723  const Scalar& beta)
724 {
725  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
726  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
727  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
728  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
729  "Y and Z must have the same rank.");
730  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
731 
732  const auto lclNumMeshRows = D.extent (0);
733 
734 #ifdef HAVE_TPETRA_DEBUG
735  // D.extent(0) is the (local) number of mesh rows.
736  // D.extent(1) is the block size. Thus, their product should be
737  // the local number of point rows, that is, the number of rows in Y.
738  const auto blkSize = D.extent (1);
739  const auto lclNumPtRows = lclNumMeshRows * blkSize;
740  TEUCHOS_TEST_FOR_EXCEPTION
741  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
742  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
743  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
744  << " = " << lclNumPtRows << ".");
745  TEUCHOS_TEST_FOR_EXCEPTION
746  (Y.extent (0) != Z.extent (0), std::invalid_argument,
747  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
748  "Z.extent(0) = " << Z.extent (0) << ".");
749  TEUCHOS_TEST_FOR_EXCEPTION
750  (Y.extent (1) != Z.extent (1), std::invalid_argument,
751  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
752  "Z.extent(1) = " << Z.extent (1) << ".");
753 #endif // HAVE_TPETRA_DEBUG
754 
755  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
756  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
757  // lclNumMeshRows must fit in LO, else the Map would not be correct.
758  range_type range (0, static_cast<LO> (lclNumMeshRows));
759  Kokkos::parallel_for (range, functor);
760 }
761 
762 } // namespace Impl
763 
764 template<class Scalar, class LO, class GO, class Node>
766 blockWiseMultiply (const Scalar& alpha,
767  const Kokkos::View<const impl_scalar_type***,
768  device_type, Kokkos::MemoryUnmanaged>& D,
770 {
771  using Kokkos::ALL;
772  typedef typename device_type::execution_space execution_space;
773  typedef typename device_type::memory_space memory_space;
774  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
775 
776  if (alpha == STS::zero ()) {
777  this->putScalar (STS::zero ());
778  }
779  else { // alpha != 0
780  const LO blockSize = this->getBlockSize ();
781  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
782  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
783  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
784  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
785 
786  // Use an explicit RangePolicy with the desired execution space.
787  // Otherwise, if you just use a number, it runs on the default
788  // execution space. For example, if the default execution space
789  // is Cuda but the current execution space is Serial, using just a
790  // number would incorrectly run with Cuda.
791  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
792  Kokkos::parallel_for (range, bwm);
793  }
794 }
795 
796 template<class Scalar, class LO, class GO, class Node>
798 blockJacobiUpdate (const Scalar& alpha,
799  const Kokkos::View<const impl_scalar_type***,
800  device_type, Kokkos::MemoryUnmanaged>& D,
803  const Scalar& beta)
804 {
805  using Kokkos::ALL;
806  using Kokkos::subview;
807  typedef typename device_type::memory_space memory_space;
808  typedef impl_scalar_type IST;
809 
810  const IST alphaImpl = static_cast<IST> (alpha);
811  const IST betaImpl = static_cast<IST> (beta);
812  const LO numVecs = mv_.getNumVectors ();
813 
814  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
815  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
816  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
817 
818  if (alpha == STS::zero ()) { // Y := beta * Y
819  this->scale (beta);
820  }
821  else { // alpha != 0
822  Z.update (STS::one (), X, -STS::one ());
823  for (LO j = 0; j < numVecs; ++j) {
824  auto X_lcl_j = subview (X_lcl, ALL (), j);
825  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
826  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
827  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
828  }
829  }
830 }
831 
832 } // namespace Tpetra
833 
834 //
835 // Explicit instantiation macro
836 //
837 // Must be expanded from within the Tpetra namespace!
838 //
839 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
840  template class BlockMultiVector< S, LO, GO, NODE >;
841 
842 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
size_t getNumVectors() const
Number of columns in the multivector.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
One or more distributed dense vectors.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Linear algebra kernels for small dense matrices and vectors.
GlobalOrdinal getIndexBase() const
The index base for this Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
int local_ordinal_type
Default value of Scalar template parameter.
MultiVector for multiple degrees of freedom per mesh point.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
The Kokkos execution space.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.