Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42 
44 #include "Tpetra_BlockView.hpp"
45 #include "Teuchos_OrdinalTraits.hpp"
46 
47 namespace { // anonymous
48 
60  template<class MultiVectorType>
61  struct RawHostPtrFromMultiVector {
62  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
63 
64  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
65  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
66  // host as modified. This is on purpose, because we don't want
67  // the BlockMultiVector sync'd to host unnecessarily.
68  // Otherwise, all the MultiVector and BlockMultiVector kernels
69  // would run on host instead of device. See Github Issue #428.
70  auto X_view_host = X.template getLocalView<typename MultiVectorType::dual_view_type::t_host::device_type> ();
71  impl_scalar_type* X_raw = X_view_host.data ();
72  return X_raw;
73  }
74  };
75 
88  template<class S, class LO, class GO, class N>
90  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
92  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
93  }
94 
95 } // namespace (anonymous)
96 
97 namespace Tpetra {
98 
99 template<class Scalar, class LO, class GO, class Node>
100 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
103 {
104  return mv_;
105 }
106 
107 template<class Scalar, class LO, class GO, class Node>
108 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
111 {
113  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
114  TEUCHOS_TEST_FOR_EXCEPTION(
115  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
116  "BlockMultiVector: The source object of an Import or Export to a "
117  "BlockMultiVector, must also be a BlockMultiVector.");
118  return Teuchos::rcp (src_bmv, false); // nonowning RCP
119 }
120 
121 template<class Scalar, class LO, class GO, class Node>
124  const Teuchos::DataAccess copyOrView) :
125  dist_object_type (in),
126  meshMap_ (in.meshMap_),
127  pointMap_ (in.pointMap_),
128  mv_ (in.mv_, copyOrView),
129  mvData_ (getRawHostPtrFromMultiVector (mv_)),
130  blockSize_ (in.blockSize_)
131 {}
132 
133 template<class Scalar, class LO, class GO, class Node>
135 BlockMultiVector (const map_type& meshMap,
136  const LO blockSize,
137  const LO numVecs) :
138  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
139  meshMap_ (meshMap),
140  pointMap_ (makePointMap (meshMap, blockSize)),
141  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
142  mvData_ (getRawHostPtrFromMultiVector (mv_)),
143  blockSize_ (blockSize)
144 {}
145 
146 template<class Scalar, class LO, class GO, class Node>
148 BlockMultiVector (const map_type& meshMap,
149  const map_type& pointMap,
150  const LO blockSize,
151  const LO numVecs) :
152  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
153  meshMap_ (meshMap),
154  pointMap_ (pointMap),
155  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
156  mvData_ (getRawHostPtrFromMultiVector (mv_)),
157  blockSize_ (blockSize)
158 {}
159 
160 template<class Scalar, class LO, class GO, class Node>
163  const map_type& meshMap,
164  const LO blockSize) :
165  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
166  meshMap_ (meshMap),
167  mvData_ (nullptr), // just for now
168  blockSize_ (blockSize)
169 {
170  using Teuchos::RCP;
171 
172  if (X_mv.getCopyOrView () == Teuchos::View) {
173  // The input MultiVector has view semantics, so assignment just
174  // does a shallow copy.
175  mv_ = X_mv;
176  }
177  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
178  // The input MultiVector has copy semantics. We can't change
179  // that, but we can make a view of the input MultiVector and
180  // change the view to have view semantics. (That sounds silly;
181  // shouldn't views always have view semantics? but remember that
182  // "view semantics" just governs the default behavior of the copy
183  // constructor and assignment operator.)
184  RCP<const mv_type> X_view_const;
185  const size_t numCols = X_mv.getNumVectors ();
186  if (numCols == 0) {
187  Teuchos::Array<size_t> cols (0); // view will have zero columns
188  X_view_const = X_mv.subView (cols ());
189  } else { // Range1D is an inclusive range
190  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
191  }
192  TEUCHOS_TEST_FOR_EXCEPTION(
193  X_view_const.is_null (), std::logic_error, "Tpetra::"
194  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
195  "should never happen. Please report this bug to the Tpetra developers.");
196 
197  // It's perfectly OK to cast away const here. Those view methods
198  // should be marked const anyway, because views can't change the
199  // allocation (just the entries themselves).
200  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
201  TEUCHOS_TEST_FOR_EXCEPTION(
202  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
203  "BlockMultiVector constructor: We just set a MultiVector "
204  "to have view semantics, but it claims that it doesn't have view "
205  "semantics. This should never happen. "
206  "Please report this bug to the Tpetra developers.");
207  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
208  }
209 
210  // At this point, mv_ has been assigned, so we can ignore X_mv.
211  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
212  if (! pointMap.is_null ()) {
213  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
214  }
215  mvData_ = getRawHostPtrFromMultiVector (mv_);
216 }
217 
218 template<class Scalar, class LO, class GO, class Node>
221  const map_type& newMeshMap,
222  const map_type& newPointMap,
223  const size_t offset) :
224  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
225  meshMap_ (newMeshMap),
226  pointMap_ (newPointMap),
227  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
228  mvData_ (getRawHostPtrFromMultiVector (mv_)),
229  blockSize_ (X.getBlockSize ())
230 {}
231 
232 template<class Scalar, class LO, class GO, class Node>
235  const map_type& newMeshMap,
236  const size_t offset) :
237  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
238  meshMap_ (newMeshMap),
239  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
240  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
241  mvData_ (getRawHostPtrFromMultiVector (mv_)),
242  blockSize_ (X.getBlockSize ())
243 {}
244 
245 template<class Scalar, class LO, class GO, class Node>
248  dist_object_type (Teuchos::null),
249  mvData_ (nullptr),
250  blockSize_ (0)
251 {}
252 
253 template<class Scalar, class LO, class GO, class Node>
256 makePointMap (const map_type& meshMap, const LO blockSize)
257 {
258  typedef Tpetra::global_size_t GST;
259  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
260 
261  const GST gblNumMeshMapInds =
262  static_cast<GST> (meshMap.getGlobalNumElements ());
263  const size_t lclNumMeshMapIndices =
264  static_cast<size_t> (meshMap.getNodeNumElements ());
265  const GST gblNumPointMapInds =
266  gblNumMeshMapInds * static_cast<GST> (blockSize);
267  const size_t lclNumPointMapInds =
268  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
269  const GO indexBase = meshMap.getIndexBase ();
270 
271  if (meshMap.isContiguous ()) {
272  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
273  meshMap.getComm ());
274  }
275  else {
276  // "Hilbert's Hotel" trick: multiply each process' GIDs by
277  // blockSize, and fill in. That ensures correctness even if the
278  // mesh Map is overlapping.
279  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
280  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
281  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
282  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
283  const GO meshGid = lclMeshGblInds[g];
284  const GO pointGidStart = indexBase +
285  (meshGid - indexBase) * static_cast<GO> (blockSize);
286  const size_type offset = g * static_cast<size_type> (blockSize);
287  for (LO k = 0; k < blockSize; ++k) {
288  const GO pointGid = pointGidStart + static_cast<GO> (k);
289  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
290  }
291  }
292  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
293  meshMap.getComm ());
294  }
295 }
296 
297 
298 template<class Scalar, class LO, class GO, class Node>
299 void
301 replaceLocalValuesImpl (const LO localRowIndex,
302  const LO colIndex,
303  const Scalar vals[]) const
304 {
305  auto X_dst = getLocalBlock (localRowIndex, colIndex);
306  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
307  getBlockSize ());
308  Kokkos::deep_copy (X_dst, X_src);
309 }
310 
311 
312 template<class Scalar, class LO, class GO, class Node>
313 bool
315 replaceLocalValues (const LO localRowIndex,
316  const LO colIndex,
317  const Scalar vals[]) const
318 {
319  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
320  return false;
321  } else {
322  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
323  return true;
324  }
325 }
326 
327 template<class Scalar, class LO, class GO, class Node>
328 bool
330 replaceGlobalValues (const GO globalRowIndex,
331  const LO colIndex,
332  const Scalar vals[]) const
333 {
334  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
335  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
336  return false;
337  } else {
338  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
339  return true;
340  }
341 }
342 
343 template<class Scalar, class LO, class GO, class Node>
344 void
346 sumIntoLocalValuesImpl (const LO localRowIndex,
347  const LO colIndex,
348  const Scalar vals[]) const
349 {
350  auto X_dst = getLocalBlock (localRowIndex, colIndex);
351  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
352  getBlockSize ());
353  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
354 }
355 
356 template<class Scalar, class LO, class GO, class Node>
357 bool
359 sumIntoLocalValues (const LO localRowIndex,
360  const LO colIndex,
361  const Scalar vals[]) const
362 {
363  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
364  return false;
365  } else {
366  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
367  return true;
368  }
369 }
370 
371 template<class Scalar, class LO, class GO, class Node>
372 bool
374 sumIntoGlobalValues (const GO globalRowIndex,
375  const LO colIndex,
376  const Scalar vals[]) const
377 {
378  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
379  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
380  return false;
381  } else {
382  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
383  return true;
384  }
385 }
386 
387 template<class Scalar, class LO, class GO, class Node>
388 bool
390 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
391 {
392  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
393  return false;
394  } else {
395  auto X_ij = getLocalBlock (localRowIndex, colIndex);
396  vals = reinterpret_cast<Scalar*> (X_ij.data ());
397  return true;
398  }
399 }
400 
401 template<class Scalar, class LO, class GO, class Node>
402 bool
404 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
405 {
406  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
407  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
408  return false;
409  } else {
410  auto X_ij = getLocalBlock (localRowIndex, colIndex);
411  vals = reinterpret_cast<Scalar*> (X_ij.data ());
412  return true;
413  }
414 }
415 
416 template<class Scalar, class LO, class GO, class Node>
419 getLocalBlock (const LO localRowIndex,
420  const LO colIndex) const
421 {
422  // NOTE (mfh 07 Jul 2016) It should be correct to add the
423  // commented-out test below. However, I've conservatively commented
424  // it out, since users might not realize that they need to have
425  // things sync'd correctly.
426 
427 // #ifdef HAVE_TPETRA_DEBUG
428 // TEUCHOS_TEST_FOR_EXCEPTION
429 // (mv_.need_sync_host (), std::runtime_error,
430 // "Tpetra::BlockMultiVector::getLocalBlock: This method "
431 // "accesses host data, but the object is not in sync on host." );
432 // #endif // HAVE_TPETRA_DEBUG
433 
434  if (! isValidLocalMeshIndex (localRowIndex)) {
435  return typename little_vec_type::HostMirror ();
436  } else {
437  const size_t blockSize = getBlockSize ();
438  const size_t offset = colIndex * this->getStrideY () +
439  localRowIndex * blockSize;
440  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
441  return typename little_vec_type::HostMirror (blockRaw, blockSize);
442  }
443 }
444 
445 template<class Scalar, class LO, class GO, class Node>
446 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
449 {
450  using Teuchos::rcp;
451  using Teuchos::rcpFromRef;
452 
453  // The source object of an Import or Export must be a
454  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
455  // them in that order; one must succeed. Note that the target of
456  // the Import or Export calls checkSizes in DistObject's doTransfer.
457  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
458  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
459  if (srcBlkVec == nullptr) {
460  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
461  if (srcMultiVec == nullptr) {
462  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
463  // currently does a shallow copy by default. This is why we
464  // return by RCP, rather than by value.
465  return rcp (new mv_type ());
466  } else { // src is a MultiVector
467  return rcp (srcMultiVec, false); // nonowning RCP
468  }
469  } else { // src is a BlockMultiVector
470  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
471  }
472 }
473 
474 template<class Scalar, class LO, class GO, class Node>
477 {
478  return ! getMultiVectorFromSrcDistObject (src).is_null ();
479 }
480 
481 template<class Scalar, class LO, class GO, class Node>
484 (const SrcDistObject& src,
485  const size_t numSameIDs,
486  const Kokkos::DualView<const local_ordinal_type*,
487  buffer_device_type>& permuteToLIDs,
488  const Kokkos::DualView<const local_ordinal_type*,
489  buffer_device_type>& permuteFromLIDs)
490 {
491  TEUCHOS_TEST_FOR_EXCEPTION
492  (true, std::logic_error,
493  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
494  "instead, create a point importer using makePointMap function.");
495 }
496 
497 template<class Scalar, class LO, class GO, class Node>
498 void BlockMultiVector<Scalar, LO, GO, Node>::
499 packAndPrepare
500 (const SrcDistObject& src,
501  const Kokkos::DualView<const local_ordinal_type*,
502  buffer_device_type>& exportLIDs,
503  Kokkos::DualView<packet_type*,
504  buffer_device_type>& exports,
505  Kokkos::DualView<size_t*,
506  buffer_device_type> numPacketsPerLID,
507  size_t& constantNumPackets,
508  Distributor& distor)
509 {
510  TEUCHOS_TEST_FOR_EXCEPTION
511  (true, std::logic_error,
512  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
513  "instead, create a point importer using makePointMap function.");
514 }
515 
516 template<class Scalar, class LO, class GO, class Node>
517 void BlockMultiVector<Scalar, LO, GO, Node>::
518 unpackAndCombine
519 (const Kokkos::DualView<const local_ordinal_type*,
520  buffer_device_type>& importLIDs,
521  Kokkos::DualView<packet_type*,
522  buffer_device_type> imports,
523  Kokkos::DualView<size_t*,
524  buffer_device_type> numPacketsPerLID,
525  const size_t constantNumPackets,
526  Distributor& distor,
527  const CombineMode combineMode)
528 {
529  TEUCHOS_TEST_FOR_EXCEPTION
530  (true, std::logic_error,
531  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
532  "instead, create a point importer using makePointMap function.");
533 }
534 
535 template<class Scalar, class LO, class GO, class Node>
537 isValidLocalMeshIndex (const LO meshLocalIndex) const
538 {
539  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
540  meshMap_.isNodeLocalElement (meshLocalIndex);
541 }
542 
543 template<class Scalar, class LO, class GO, class Node>
545 putScalar (const Scalar& val)
546 {
547  mv_.putScalar (val);
548 }
549 
550 template<class Scalar, class LO, class GO, class Node>
552 scale (const Scalar& val)
553 {
554  mv_.scale (val);
555 }
556 
557 template<class Scalar, class LO, class GO, class Node>
559 update (const Scalar& alpha,
561  const Scalar& beta)
562 {
563  mv_.update (alpha, X.mv_, beta);
564 }
565 
566 namespace Impl {
567 // Y := alpha * D * X
568 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
569 struct BlockWiseMultiply {
570  typedef typename ViewD::size_type Size;
571 
572 private:
573  typedef typename ViewD::device_type Device;
574  typedef typename ViewD::non_const_value_type ImplScalar;
575  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
576 
577  template <typename View>
578  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
579  typename View::device_type, Unmanaged>;
580  typedef UnmanagedView<ViewY> UnMViewY;
581  typedef UnmanagedView<ViewD> UnMViewD;
582  typedef UnmanagedView<ViewX> UnMViewX;
583 
584  const Size block_size_;
585  Scalar alpha_;
586  UnMViewY Y_;
587  UnMViewD D_;
588  UnMViewX X_;
589 
590 public:
591  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
592  const ViewY& Y, const ViewD& D, const ViewX& X)
593  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
594  {}
595 
596  KOKKOS_INLINE_FUNCTION
597  void operator() (const Size k) const {
598  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
599  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
600  const auto num_vecs = X_.extent(1);
601  for (Size i = 0; i < num_vecs; ++i) {
602  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
603  auto X_curBlk = Kokkos::subview(X_, kslice, i);
604  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
605  // Y_curBlk := alpha * D_curBlk * X_curBlk.
606  // Recall that GEMV does an update (+=) of the last argument.
607  Tpetra::FILL(Y_curBlk, zero);
608  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
609  }
610  }
611 };
612 
613 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
614 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
615 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
616  const ViewY& Y, const ViewD& D, const ViewX& X) {
617  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
618 }
619 
620 template <typename ViewY,
621  typename Scalar,
622  typename ViewD,
623  typename ViewZ,
624  typename LO = typename ViewY::size_type>
625 class BlockJacobiUpdate {
626 private:
627  typedef typename ViewD::device_type Device;
628  typedef typename ViewD::non_const_value_type ImplScalar;
629  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
630 
631  template <typename ViewType>
632  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
633  typename ViewType::array_layout,
634  typename ViewType::device_type,
635  Unmanaged>;
636  typedef UnmanagedView<ViewY> UnMViewY;
637  typedef UnmanagedView<ViewD> UnMViewD;
638  typedef UnmanagedView<ViewZ> UnMViewZ;
639 
640  const LO blockSize_;
641  UnMViewY Y_;
642  const Scalar alpha_;
643  UnMViewD D_;
644  UnMViewZ Z_;
645  const Scalar beta_;
646 
647 public:
648  BlockJacobiUpdate (const ViewY& Y,
649  const Scalar& alpha,
650  const ViewD& D,
651  const ViewZ& Z,
652  const Scalar& beta) :
653  blockSize_ (D.extent (1)),
654  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
655  Y_ (Y),
656  alpha_ (alpha),
657  D_ (D),
658  Z_ (Z),
659  beta_ (beta)
660  {
661  static_assert (static_cast<int> (ViewY::rank) == 1,
662  "Y must have rank 1.");
663  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
664  static_assert (static_cast<int> (ViewZ::rank) == 1,
665  "Z must have rank 1.");
666  // static_assert (static_cast<int> (ViewZ::rank) ==
667  // static_cast<int> (ViewY::rank),
668  // "Z must have the same rank as Y.");
669  }
670 
671  KOKKOS_INLINE_FUNCTION void
672  operator() (const LO& k) const
673  {
674  using Kokkos::ALL;
675  using Kokkos::subview;
676  typedef Kokkos::pair<LO, LO> range_type;
677  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
678 
679  // We only have to implement the alpha != 0 case.
680 
681  auto D_curBlk = subview (D_, k, ALL (), ALL ());
682  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
683 
684  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
685 
686  auto Z_curBlk = subview (Z_, kslice);
687  auto Y_curBlk = subview (Y_, kslice);
688  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
689  if (beta_ == KAT::zero ()) {
690  Tpetra::FILL (Y_curBlk, KAT::zero ());
691  }
692  else if (beta_ != KAT::one ()) {
693  Tpetra::SCAL (beta_, Y_curBlk);
694  }
695  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
696  }
697 };
698 
699 template<class ViewY,
700  class Scalar,
701  class ViewD,
702  class ViewZ,
703  class LO = typename ViewD::size_type>
704 void
705 blockJacobiUpdate (const ViewY& Y,
706  const Scalar& alpha,
707  const ViewD& D,
708  const ViewZ& Z,
709  const Scalar& beta)
710 {
711  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
712  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
713  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
714  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
715  "Y and Z must have the same rank.");
716  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
717 
718  const auto lclNumMeshRows = D.extent (0);
719 
720 #ifdef HAVE_TPETRA_DEBUG
721  // D.extent(0) is the (local) number of mesh rows.
722  // D.extent(1) is the block size. Thus, their product should be
723  // the local number of point rows, that is, the number of rows in Y.
724  const auto blkSize = D.extent (1);
725  const auto lclNumPtRows = lclNumMeshRows * blkSize;
726  TEUCHOS_TEST_FOR_EXCEPTION
727  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
728  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
729  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
730  << " = " << lclNumPtRows << ".");
731  TEUCHOS_TEST_FOR_EXCEPTION
732  (Y.extent (0) != Z.extent (0), std::invalid_argument,
733  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
734  "Z.extent(0) = " << Z.extent (0) << ".");
735  TEUCHOS_TEST_FOR_EXCEPTION
736  (Y.extent (1) != Z.extent (1), std::invalid_argument,
737  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
738  "Z.extent(1) = " << Z.extent (1) << ".");
739 #endif // HAVE_TPETRA_DEBUG
740 
741  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
742  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
743  // lclNumMeshRows must fit in LO, else the Map would not be correct.
744  range_type range (0, static_cast<LO> (lclNumMeshRows));
745  Kokkos::parallel_for (range, functor);
746 }
747 
748 } // namespace Impl
749 
750 template<class Scalar, class LO, class GO, class Node>
752 blockWiseMultiply (const Scalar& alpha,
753  const Kokkos::View<const impl_scalar_type***,
754  device_type, Kokkos::MemoryUnmanaged>& D,
756 {
757  using Kokkos::ALL;
758  typedef typename device_type::execution_space execution_space;
759  typedef typename device_type::memory_space memory_space;
760  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
761 
762  if (alpha == STS::zero ()) {
763  this->putScalar (STS::zero ());
764  }
765  else { // alpha != 0
766  const LO blockSize = this->getBlockSize ();
767  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
768  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
769  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
770  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
771 
772  // Use an explicit RangePolicy with the desired execution space.
773  // Otherwise, if you just use a number, it runs on the default
774  // execution space. For example, if the default execution space
775  // is Cuda but the current execution space is Serial, using just a
776  // number would incorrectly run with Cuda.
777  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
778  Kokkos::parallel_for (range, bwm);
779  }
780 }
781 
782 template<class Scalar, class LO, class GO, class Node>
784 blockJacobiUpdate (const Scalar& alpha,
785  const Kokkos::View<const impl_scalar_type***,
786  device_type, Kokkos::MemoryUnmanaged>& D,
789  const Scalar& beta)
790 {
791  using Kokkos::ALL;
792  using Kokkos::subview;
793  typedef typename device_type::memory_space memory_space;
794  typedef impl_scalar_type IST;
795 
796  const IST alphaImpl = static_cast<IST> (alpha);
797  const IST betaImpl = static_cast<IST> (beta);
798  const LO numVecs = mv_.getNumVectors ();
799 
800  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
801  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
802  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
803 
804  if (alpha == STS::zero ()) { // Y := beta * Y
805  this->scale (beta);
806  }
807  else { // alpha != 0
808  Z.update (STS::one (), X, -STS::one ());
809  for (LO j = 0; j < numVecs; ++j) {
810  auto X_lcl_j = subview (X_lcl, ALL (), j);
811  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
812  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
813  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
814  }
815  }
816 }
817 
818 } // namespace Tpetra
819 
820 //
821 // Explicit instantiation macro
822 //
823 // Must be expanded from within the Tpetra namespace!
824 //
825 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
826  template class BlockMultiVector< S, LO, GO, NODE >;
827 
828 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
size_t getNumVectors() const
Number of columns in the multivector.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
One or more distributed dense vectors.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Linear algebra kernels for small dense matrices and vectors.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
MultiVector for multiple degrees of freedom per mesh point.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
The Kokkos execution space.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.