Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42 
44 #include "Tpetra_BlockView.hpp"
45 #include "Teuchos_OrdinalTraits.hpp"
46 
47 
48 namespace Tpetra {
49 
50 template<class Scalar, class LO, class GO, class Node>
51 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
54 {
55  return mv_;
56 }
57 
58 template<class Scalar, class LO, class GO, class Node>
59 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
62 {
64  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
65  TEUCHOS_TEST_FOR_EXCEPTION(
66  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
67  "BlockMultiVector: The source object of an Import or Export to a "
68  "BlockMultiVector, must also be a BlockMultiVector.");
69  return Teuchos::rcp (src_bmv, false); // nonowning RCP
70 }
71 
72 template<class Scalar, class LO, class GO, class Node>
75  const Teuchos::DataAccess copyOrView) :
76  dist_object_type (in),
77  meshMap_ (in.meshMap_),
78  pointMap_ (in.pointMap_),
79  mv_ (in.mv_, copyOrView),
80  blockSize_ (in.blockSize_)
81 {}
82 
83 template<class Scalar, class LO, class GO, class Node>
85 BlockMultiVector (const map_type& meshMap,
86  const LO blockSize,
87  const LO numVecs) :
88  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
89  meshMap_ (meshMap),
90  pointMap_ (makePointMapRCP (meshMap, blockSize)),
91  mv_ (pointMap_, numVecs), // nonowning RCP is OK, since pointMap_ won't go away
92  blockSize_ (blockSize)
93 {}
94 
95 template<class Scalar, class LO, class GO, class Node>
97 BlockMultiVector (const map_type& meshMap,
98  const map_type& pointMap,
99  const LO blockSize,
100  const LO numVecs) :
101  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
102  meshMap_ (meshMap),
103  pointMap_ (new map_type(pointMap)),
104  mv_ (pointMap_, numVecs),
105  blockSize_ (blockSize)
106 {}
107 
108 template<class Scalar, class LO, class GO, class Node>
111  const map_type& meshMap,
112  const LO blockSize) :
113  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
114  meshMap_ (meshMap),
115  blockSize_ (blockSize)
116 {
117  using Teuchos::RCP;
118 
119  if (X_mv.getCopyOrView () == Teuchos::View) {
120  // The input MultiVector has view semantics, so assignment just
121  // does a shallow copy.
122  mv_ = X_mv;
123  }
124  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
125  // The input MultiVector has copy semantics. We can't change
126  // that, but we can make a view of the input MultiVector and
127  // change the view to have view semantics. (That sounds silly;
128  // shouldn't views always have view semantics? but remember that
129  // "view semantics" just governs the default behavior of the copy
130  // constructor and assignment operator.)
131  RCP<const mv_type> X_view_const;
132  const size_t numCols = X_mv.getNumVectors ();
133  if (numCols == 0) {
134  Teuchos::Array<size_t> cols (0); // view will have zero columns
135  X_view_const = X_mv.subView (cols ());
136  } else { // Range1D is an inclusive range
137  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
138  }
139  TEUCHOS_TEST_FOR_EXCEPTION(
140  X_view_const.is_null (), std::logic_error, "Tpetra::"
141  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
142  "should never happen. Please report this bug to the Tpetra developers.");
143 
144  // It's perfectly OK to cast away const here. Those view methods
145  // should be marked const anyway, because views can't change the
146  // allocation (just the entries themselves).
147  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
148  TEUCHOS_TEST_FOR_EXCEPTION(
149  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
150  "BlockMultiVector constructor: We just set a MultiVector "
151  "to have view semantics, but it claims that it doesn't have view "
152  "semantics. This should never happen. "
153  "Please report this bug to the Tpetra developers.");
154  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
155  }
156 
157  // At this point, mv_ has been assigned, so we can ignore X_mv.
158  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
159  pointMap_ = pointMap;
160 
161 }
162 
163 template<class Scalar, class LO, class GO, class Node>
166  const map_type& newMeshMap,
167  const map_type& newPointMap,
168  const size_t offset) :
169  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
170  meshMap_ (newMeshMap),
171  pointMap_ (new map_type(newPointMap)),
172  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
173  blockSize_ (X.getBlockSize ())
174 {}
175 
176 template<class Scalar, class LO, class GO, class Node>
179  const map_type& newMeshMap,
180  const size_t offset) :
181  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
182  meshMap_ (newMeshMap),
183  pointMap_ (makePointMapRCP (newMeshMap, X.getBlockSize ())),
184  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
185  blockSize_ (X.getBlockSize ())
186 {}
187 
188 template<class Scalar, class LO, class GO, class Node>
191  dist_object_type (Teuchos::null),
192  blockSize_ (0)
193 {}
194 
195 template<class Scalar, class LO, class GO, class Node>
198 makePointMap (const map_type& meshMap, const LO blockSize)
199 {
200 typedef Tpetra::global_size_t GST;
201  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
202 
203  const GST gblNumMeshMapInds =
204  static_cast<GST> (meshMap.getGlobalNumElements ());
205  const size_t lclNumMeshMapIndices =
206  static_cast<size_t> (meshMap.getLocalNumElements ());
207  const GST gblNumPointMapInds =
208  gblNumMeshMapInds * static_cast<GST> (blockSize);
209  const size_t lclNumPointMapInds =
210  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
211  const GO indexBase = meshMap.getIndexBase ();
212 
213  if (meshMap.isContiguous ()) {
214  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
215  meshMap.getComm ());
216  }
217  else {
218  // "Hilbert's Hotel" trick: multiply each process' GIDs by
219  // blockSize, and fill in. That ensures correctness even if the
220  // mesh Map is overlapping.
221  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
222  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
223  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
224  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
225  const GO meshGid = lclMeshGblInds[g];
226  const GO pointGidStart = indexBase +
227  (meshGid - indexBase) * static_cast<GO> (blockSize);
228  const size_type offset = g * static_cast<size_type> (blockSize);
229  for (LO k = 0; k < blockSize; ++k) {
230  const GO pointGid = pointGidStart + static_cast<GO> (k);
231  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
232  }
233  }
234  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
235  meshMap.getComm ());
236 
237  }
238 }
239 
240 
241 template<class Scalar, class LO, class GO, class Node>
242 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
244 makePointMapRCP (const map_type& meshMap, const LO blockSize)
245 {
246 typedef Tpetra::global_size_t GST;
247  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
248 
249  const GST gblNumMeshMapInds =
250  static_cast<GST> (meshMap.getGlobalNumElements ());
251  const size_t lclNumMeshMapIndices =
252  static_cast<size_t> (meshMap.getLocalNumElements ());
253  const GST gblNumPointMapInds =
254  gblNumMeshMapInds * static_cast<GST> (blockSize);
255  const size_t lclNumPointMapInds =
256  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
257  const GO indexBase = meshMap.getIndexBase ();
258 
259  if (meshMap.isContiguous ()) {
260  return Teuchos::rcp(new map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
261  meshMap.getComm ()));
262  }
263  else {
264  // "Hilbert's Hotel" trick: multiply each process' GIDs by
265  // blockSize, and fill in. That ensures correctness even if the
266  // mesh Map is overlapping.
267  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
268  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
269  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
270  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
271  const GO meshGid = lclMeshGblInds[g];
272  const GO pointGidStart = indexBase +
273  (meshGid - indexBase) * static_cast<GO> (blockSize);
274  const size_type offset = g * static_cast<size_type> (blockSize);
275  for (LO k = 0; k < blockSize; ++k) {
276  const GO pointGid = pointGidStart + static_cast<GO> (k);
277  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
278  }
279  }
280  return Teuchos::rcp(new map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
281  meshMap.getComm ()));
282 
283  }
284 }
285 
286 template<class Scalar, class LO, class GO, class Node>
287 void
289 replaceLocalValuesImpl (const LO localRowIndex,
290  const LO colIndex,
291  const Scalar vals[])
292 {
293  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
294  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
295  getBlockSize ());
296  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
297  using exec_space = typename device_type::execution_space;
298  Kokkos::deep_copy (exec_space(), X_dst, X_src);
299 }
300 
301 
302 template<class Scalar, class LO, class GO, class Node>
303 bool
305 replaceLocalValues (const LO localRowIndex,
306  const LO colIndex,
307  const Scalar vals[])
308 {
309  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
310  return false;
311  } else {
312  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
313  return true;
314  }
315 }
316 
317 template<class Scalar, class LO, class GO, class Node>
318 bool
320 replaceGlobalValues (const GO globalRowIndex,
321  const LO colIndex,
322  const Scalar vals[])
323 {
324  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
325  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
326  return false;
327  } else {
328  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
329  return true;
330  }
331 }
332 
333 template<class Scalar, class LO, class GO, class Node>
334 void
336 sumIntoLocalValuesImpl (const LO localRowIndex,
337  const LO colIndex,
338  const Scalar vals[])
339 {
340  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
341  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
342  getBlockSize ());
343  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
344 }
345 
346 template<class Scalar, class LO, class GO, class Node>
347 bool
349 sumIntoLocalValues (const LO localRowIndex,
350  const LO colIndex,
351  const Scalar vals[])
352 {
353  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
354  return false;
355  } else {
356  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
357  return true;
358  }
359 }
360 
361 template<class Scalar, class LO, class GO, class Node>
362 bool
364 sumIntoGlobalValues (const GO globalRowIndex,
365  const LO colIndex,
366  const Scalar vals[])
367 {
368  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
369  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
370  return false;
371  } else {
372  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
373  return true;
374  }
375 }
376 
377 
378 template<class Scalar, class LO, class GO, class Node>
379 typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
381 getLocalBlockHost (const LO localRowIndex,
382  const LO colIndex,
383  const Access::ReadOnlyStruct) const
384 {
385  if (!isValidLocalMeshIndex(localRowIndex)) {
386  return const_little_host_vec_type();
387  } else {
388  const size_t blockSize = getBlockSize();
389  auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
390  LO startRow = localRowIndex*blockSize;
391  LO endRow = startRow + blockSize;
392  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
393  colIndex);
394  }
395 }
396 
397 template<class Scalar, class LO, class GO, class Node>
398 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
399 BlockMultiVector<Scalar, LO, GO, Node>::
400 getLocalBlockHost (const LO localRowIndex,
401  const LO colIndex,
402  const Access::OverwriteAllStruct)
403 {
404  if (!isValidLocalMeshIndex(localRowIndex)) {
405  return little_host_vec_type();
406  } else {
407  const size_t blockSize = getBlockSize();
408  auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
409  LO startRow = localRowIndex*blockSize;
410  LO endRow = startRow + blockSize;
411  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
412  colIndex);
413  }
414 }
415 
416 template<class Scalar, class LO, class GO, class Node>
417 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
419 getLocalBlockHost (const LO localRowIndex,
420  const LO colIndex,
421  const Access::ReadWriteStruct)
422 {
423  if (!isValidLocalMeshIndex(localRowIndex)) {
424  return little_host_vec_type();
425  } else {
426  const size_t blockSize = getBlockSize();
427  auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
428  LO startRow = localRowIndex*blockSize;
429  LO endRow = startRow + blockSize;
430  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
431  colIndex);
432  }
433 }
434 
435 template<class Scalar, class LO, class GO, class Node>
436 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
437 BlockMultiVector<Scalar, LO, GO, Node>::
438 getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
439 {
440  using Teuchos::rcp;
441  using Teuchos::rcpFromRef;
442 
443  // The source object of an Import or Export must be a
444  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
445  // them in that order; one must succeed. Note that the target of
446  // the Import or Export calls checkSizes in DistObject's doTransfer.
447  typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
448  const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
449  if (srcBlkVec == nullptr) {
450  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
451  if (srcMultiVec == nullptr) {
452  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
453  // currently does a shallow copy by default. This is why we
454  // return by RCP, rather than by value.
455  return rcp (new mv_type ());
456  } else { // src is a MultiVector
457  return rcp (srcMultiVec, false); // nonowning RCP
458  }
459  } else { // src is a BlockMultiVector
460  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
461  }
462 }
463 
464 template<class Scalar, class LO, class GO, class Node>
467 {
468  return ! getMultiVectorFromSrcDistObject (src).is_null ();
469 }
470 
471 template<class Scalar, class LO, class GO, class Node>
474 (const SrcDistObject& src,
475  const size_t numSameIDs,
476  const Kokkos::DualView<const local_ordinal_type*,
477  buffer_device_type>& permuteToLIDs,
478  const Kokkos::DualView<const local_ordinal_type*,
479  buffer_device_type>& permuteFromLIDs,
480  const CombineMode CM)
481 {
482  TEUCHOS_TEST_FOR_EXCEPTION
483  (true, std::logic_error,
484  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
485  "instead, create a point importer using makePointMap function.");
486 }
487 
488 template<class Scalar, class LO, class GO, class Node>
491 (const SrcDistObject& src,
492  const Kokkos::DualView<const local_ordinal_type*,
493  buffer_device_type>& exportLIDs,
494  Kokkos::DualView<packet_type*,
495  buffer_device_type>& exports,
496  Kokkos::DualView<size_t*,
497  buffer_device_type> numPacketsPerLID,
498  size_t& constantNumPackets)
499 {
500  TEUCHOS_TEST_FOR_EXCEPTION
501  (true, std::logic_error,
502  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
503  "instead, create a point importer using makePointMap function.");
504 }
505 
506 template<class Scalar, class LO, class GO, class Node>
509 (const Kokkos::DualView<const local_ordinal_type*,
510  buffer_device_type>& importLIDs,
511  Kokkos::DualView<packet_type*,
512  buffer_device_type> imports,
513  Kokkos::DualView<size_t*,
514  buffer_device_type> numPacketsPerLID,
515  const size_t constantNumPackets,
516  const CombineMode combineMode)
517 {
518  TEUCHOS_TEST_FOR_EXCEPTION
519  (true, std::logic_error,
520  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
521  "instead, create a point importer using makePointMap function.");
522 }
523 
524 template<class Scalar, class LO, class GO, class Node>
526 isValidLocalMeshIndex (const LO meshLocalIndex) const
527 {
528  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
529  meshMap_.isNodeLocalElement (meshLocalIndex);
530 }
531 
532 template<class Scalar, class LO, class GO, class Node>
534 putScalar (const Scalar& val)
535 {
536  mv_.putScalar (val);
537 }
538 
539 template<class Scalar, class LO, class GO, class Node>
541 scale (const Scalar& val)
542 {
543  mv_.scale (val);
544 }
545 
546 template<class Scalar, class LO, class GO, class Node>
548 update (const Scalar& alpha,
550  const Scalar& beta)
551 {
552  mv_.update (alpha, X.mv_, beta);
553 }
554 
555 namespace Impl {
556 // Y := alpha * D * X
557 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
558 struct BlockWiseMultiply {
559  typedef typename ViewD::size_type Size;
560 
561 private:
562  typedef typename ViewD::device_type Device;
563  typedef typename ViewD::non_const_value_type ImplScalar;
564  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
565 
566  template <typename View>
567  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
568  typename View::device_type, Unmanaged>;
569  typedef UnmanagedView<ViewY> UnMViewY;
570  typedef UnmanagedView<ViewD> UnMViewD;
571  typedef UnmanagedView<ViewX> UnMViewX;
572 
573  const Size block_size_;
574  Scalar alpha_;
575  UnMViewY Y_;
576  UnMViewD D_;
577  UnMViewX X_;
578 
579 public:
580  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
581  const ViewY& Y, const ViewD& D, const ViewX& X)
582  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
583  {}
584 
585  KOKKOS_INLINE_FUNCTION
586  void operator() (const Size k) const {
587  const auto zero = Kokkos::ArithTraits<Scalar>::zero();
588  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
589  const auto num_vecs = X_.extent(1);
590  for (Size i = 0; i < num_vecs; ++i) {
591  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
592  auto X_curBlk = Kokkos::subview(X_, kslice, i);
593  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
594  // Y_curBlk := alpha * D_curBlk * X_curBlk.
595  // Recall that GEMV does an update (+=) of the last argument.
596  Tpetra::FILL(Y_curBlk, zero);
597  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
598  }
599  }
600 };
601 
602 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
603 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
604 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
605  const ViewY& Y, const ViewD& D, const ViewX& X) {
606  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
607 }
608 
609 template <typename ViewY,
610  typename Scalar,
611  typename ViewD,
612  typename ViewZ,
613  typename LO = typename ViewY::size_type>
614 class BlockJacobiUpdate {
615 private:
616  typedef typename ViewD::device_type Device;
617  typedef typename ViewD::non_const_value_type ImplScalar;
618  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
619 
620  template <typename ViewType>
621  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
622  typename ViewType::array_layout,
623  typename ViewType::device_type,
624  Unmanaged>;
625  typedef UnmanagedView<ViewY> UnMViewY;
626  typedef UnmanagedView<ViewD> UnMViewD;
627  typedef UnmanagedView<ViewZ> UnMViewZ;
628 
629  const LO blockSize_;
630  UnMViewY Y_;
631  const Scalar alpha_;
632  UnMViewD D_;
633  UnMViewZ Z_;
634  const Scalar beta_;
635 
636 public:
637  BlockJacobiUpdate (const ViewY& Y,
638  const Scalar& alpha,
639  const ViewD& D,
640  const ViewZ& Z,
641  const Scalar& beta) :
642  blockSize_ (D.extent (1)),
643  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
644  Y_ (Y),
645  alpha_ (alpha),
646  D_ (D),
647  Z_ (Z),
648  beta_ (beta)
649  {
650  static_assert (static_cast<int> (ViewY::rank) == 1,
651  "Y must have rank 1.");
652  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
653  static_assert (static_cast<int> (ViewZ::rank) == 1,
654  "Z must have rank 1.");
655  // static_assert (static_cast<int> (ViewZ::rank) ==
656  // static_cast<int> (ViewY::rank),
657  // "Z must have the same rank as Y.");
658  }
659 
660  KOKKOS_INLINE_FUNCTION void
661  operator() (const LO& k) const
662  {
663  using Kokkos::ALL;
664  using Kokkos::subview;
665  typedef Kokkos::pair<LO, LO> range_type;
666  typedef Kokkos::ArithTraits<Scalar> KAT;
667 
668  // We only have to implement the alpha != 0 case.
669 
670  auto D_curBlk = subview (D_, k, ALL (), ALL ());
671  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
672 
673  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
674 
675  auto Z_curBlk = subview (Z_, kslice);
676  auto Y_curBlk = subview (Y_, kslice);
677  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
678  if (beta_ == KAT::zero ()) {
679  Tpetra::FILL (Y_curBlk, KAT::zero ());
680  }
681  else if (beta_ != KAT::one ()) {
682  Tpetra::SCAL (beta_, Y_curBlk);
683  }
684  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
685  }
686 };
687 
688 template<class ViewY,
689  class Scalar,
690  class ViewD,
691  class ViewZ,
692  class LO = typename ViewD::size_type>
693 void
694 blockJacobiUpdate (const ViewY& Y,
695  const Scalar& alpha,
696  const ViewD& D,
697  const ViewZ& Z,
698  const Scalar& beta)
699 {
700  static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
701  static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
702  static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
703  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
704  "Y and Z must have the same rank.");
705  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
706 
707  const auto lclNumMeshRows = D.extent (0);
708 
709 #ifdef HAVE_TPETRA_DEBUG
710  // D.extent(0) is the (local) number of mesh rows.
711  // D.extent(1) is the block size. Thus, their product should be
712  // the local number of point rows, that is, the number of rows in Y.
713  const auto blkSize = D.extent (1);
714  const auto lclNumPtRows = lclNumMeshRows * blkSize;
715  TEUCHOS_TEST_FOR_EXCEPTION
716  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
717  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
718  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
719  << " = " << lclNumPtRows << ".");
720  TEUCHOS_TEST_FOR_EXCEPTION
721  (Y.extent (0) != Z.extent (0), std::invalid_argument,
722  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
723  "Z.extent(0) = " << Z.extent (0) << ".");
724  TEUCHOS_TEST_FOR_EXCEPTION
725  (Y.extent (1) != Z.extent (1), std::invalid_argument,
726  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
727  "Z.extent(1) = " << Z.extent (1) << ".");
728 #endif // HAVE_TPETRA_DEBUG
729 
730  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
731  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
732  // lclNumMeshRows must fit in LO, else the Map would not be correct.
733  range_type range (0, static_cast<LO> (lclNumMeshRows));
734  Kokkos::parallel_for (range, functor);
735 }
736 
737 } // namespace Impl
738 
739 template<class Scalar, class LO, class GO, class Node>
741 blockWiseMultiply (const Scalar& alpha,
742  const Kokkos::View<const impl_scalar_type***,
743  device_type, Kokkos::MemoryUnmanaged>& D,
745 {
746  using Kokkos::ALL;
747  typedef typename device_type::execution_space exec_space;
748  const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
749 
750  if (alpha == STS::zero ()) {
751  this->putScalar (STS::zero ());
752  }
753  else { // alpha != 0
754  const LO blockSize = this->getBlockSize ();
755  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
756  auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
757  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
758  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
759 
760  // Use an explicit RangePolicy with the desired execution space.
761  // Otherwise, if you just use a number, it runs on the default
762  // execution space. For example, if the default execution space
763  // is Cuda but the current execution space is Serial, using just a
764  // number would incorrectly run with Cuda.
765  Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
766  Kokkos::parallel_for (range, bwm);
767  }
768 }
769 
770 template<class Scalar, class LO, class GO, class Node>
772 blockJacobiUpdate (const Scalar& alpha,
773  const Kokkos::View<const impl_scalar_type***,
774  device_type, Kokkos::MemoryUnmanaged>& D,
777  const Scalar& beta)
778 {
779  using Kokkos::ALL;
780  using Kokkos::subview;
781  typedef impl_scalar_type IST;
782 
783  const IST alphaImpl = static_cast<IST> (alpha);
784  const IST betaImpl = static_cast<IST> (beta);
785  const LO numVecs = mv_.getNumVectors ();
786 
787  if (alpha == STS::zero ()) { // Y := beta * Y
788  this->scale (beta);
789  }
790  else { // alpha != 0
791  Z.update (STS::one (), X, -STS::one ());
792  for (LO j = 0; j < numVecs; ++j) {
793  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
794  auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
795  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
796  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
797  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
798  }
799  }
800 }
801 
802 } // namespace Tpetra
803 
804 //
805 // Explicit instantiation macro
806 //
807 // Must be expanded from within the Tpetra namespace!
808 //
809 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
810  template class BlockMultiVector< S, LO, GO, NODE >;
811 
812 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
LO local_ordinal_type
The type of local indices.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
Linear algebra kernels for small dense matrices and vectors.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector&#39;s local data on device. This requires that th...
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.