Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42 
44 #include "Tpetra_BlockView.hpp"
45 #include "Teuchos_OrdinalTraits.hpp"
46 
47 
48 namespace Tpetra {
49 
50 template<class Scalar, class LO, class GO, class Node>
51 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
54 {
55  return mv_;
56 }
57 
58 template<class Scalar, class LO, class GO, class Node>
59 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
62 {
64  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
65  TEUCHOS_TEST_FOR_EXCEPTION(
66  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
67  "BlockMultiVector: The source object of an Import or Export to a "
68  "BlockMultiVector, must also be a BlockMultiVector.");
69  return Teuchos::rcp (src_bmv, false); // nonowning RCP
70 }
71 
72 template<class Scalar, class LO, class GO, class Node>
75  const Teuchos::DataAccess copyOrView) :
76  dist_object_type (in),
77  meshMap_ (in.meshMap_),
78  pointMap_ (in.pointMap_),
79  mv_ (in.mv_, copyOrView),
80  blockSize_ (in.blockSize_)
81 {}
82 
83 template<class Scalar, class LO, class GO, class Node>
85 BlockMultiVector (const map_type& meshMap,
86  const LO blockSize,
87  const LO numVecs) :
88  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
89  meshMap_ (meshMap),
90  pointMap_ (makePointMap (meshMap, blockSize)),
91  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
92  blockSize_ (blockSize)
93 {}
94 
95 template<class Scalar, class LO, class GO, class Node>
97 BlockMultiVector (const map_type& meshMap,
98  const map_type& pointMap,
99  const LO blockSize,
100  const LO numVecs) :
101  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
102  meshMap_ (meshMap),
103  pointMap_ (pointMap),
104  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
105  blockSize_ (blockSize)
106 {}
107 
108 template<class Scalar, class LO, class GO, class Node>
111  const map_type& meshMap,
112  const LO blockSize) :
113  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
114  meshMap_ (meshMap),
115  blockSize_ (blockSize)
116 {
117  using Teuchos::RCP;
118 
119  if (X_mv.getCopyOrView () == Teuchos::View) {
120  // The input MultiVector has view semantics, so assignment just
121  // does a shallow copy.
122  mv_ = X_mv;
123  }
124  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
125  // The input MultiVector has copy semantics. We can't change
126  // that, but we can make a view of the input MultiVector and
127  // change the view to have view semantics. (That sounds silly;
128  // shouldn't views always have view semantics? but remember that
129  // "view semantics" just governs the default behavior of the copy
130  // constructor and assignment operator.)
131  RCP<const mv_type> X_view_const;
132  const size_t numCols = X_mv.getNumVectors ();
133  if (numCols == 0) {
134  Teuchos::Array<size_t> cols (0); // view will have zero columns
135  X_view_const = X_mv.subView (cols ());
136  } else { // Range1D is an inclusive range
137  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
138  }
139  TEUCHOS_TEST_FOR_EXCEPTION(
140  X_view_const.is_null (), std::logic_error, "Tpetra::"
141  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
142  "should never happen. Please report this bug to the Tpetra developers.");
143 
144  // It's perfectly OK to cast away const here. Those view methods
145  // should be marked const anyway, because views can't change the
146  // allocation (just the entries themselves).
147  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
148  TEUCHOS_TEST_FOR_EXCEPTION(
149  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
150  "BlockMultiVector constructor: We just set a MultiVector "
151  "to have view semantics, but it claims that it doesn't have view "
152  "semantics. This should never happen. "
153  "Please report this bug to the Tpetra developers.");
154  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
155  }
156 
157  // At this point, mv_ has been assigned, so we can ignore X_mv.
158  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
159  if (! pointMap.is_null ()) {
160  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
161  }
162 }
163 
164 template<class Scalar, class LO, class GO, class Node>
167  const map_type& newMeshMap,
168  const map_type& newPointMap,
169  const size_t offset) :
170  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
171  meshMap_ (newMeshMap),
172  pointMap_ (newPointMap),
173  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
174  blockSize_ (X.getBlockSize ())
175 {}
176 
177 template<class Scalar, class LO, class GO, class Node>
180  const map_type& newMeshMap,
181  const size_t offset) :
182  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
183  meshMap_ (newMeshMap),
184  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
185  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
186  blockSize_ (X.getBlockSize ())
187 {}
188 
189 template<class Scalar, class LO, class GO, class Node>
192  dist_object_type (Teuchos::null),
193  blockSize_ (0)
194 {}
195 
196 template<class Scalar, class LO, class GO, class Node>
199 makePointMap (const map_type& meshMap, const LO blockSize)
200 {
201  typedef Tpetra::global_size_t GST;
202  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
203 
204  const GST gblNumMeshMapInds =
205  static_cast<GST> (meshMap.getGlobalNumElements ());
206  const size_t lclNumMeshMapIndices =
207  static_cast<size_t> (meshMap.getLocalNumElements ());
208  const GST gblNumPointMapInds =
209  gblNumMeshMapInds * static_cast<GST> (blockSize);
210  const size_t lclNumPointMapInds =
211  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
212  const GO indexBase = meshMap.getIndexBase ();
213 
214  if (meshMap.isContiguous ()) {
215  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
216  meshMap.getComm ());
217  }
218  else {
219  // "Hilbert's Hotel" trick: multiply each process' GIDs by
220  // blockSize, and fill in. That ensures correctness even if the
221  // mesh Map is overlapping.
222  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
223  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
224  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
225  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
226  const GO meshGid = lclMeshGblInds[g];
227  const GO pointGidStart = indexBase +
228  (meshGid - indexBase) * static_cast<GO> (blockSize);
229  const size_type offset = g * static_cast<size_type> (blockSize);
230  for (LO k = 0; k < blockSize; ++k) {
231  const GO pointGid = pointGidStart + static_cast<GO> (k);
232  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
233  }
234  }
235  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
236  meshMap.getComm ());
237  }
238 }
239 
240 
241 template<class Scalar, class LO, class GO, class Node>
242 void
244 replaceLocalValuesImpl (const LO localRowIndex,
245  const LO colIndex,
246  const Scalar vals[])
247 {
248  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
249  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
250  getBlockSize ());
251  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
252  using exec_space = typename device_type::execution_space;
253  Kokkos::deep_copy (exec_space(), X_dst, X_src);
254 }
255 
256 
257 template<class Scalar, class LO, class GO, class Node>
258 bool
260 replaceLocalValues (const LO localRowIndex,
261  const LO colIndex,
262  const Scalar vals[])
263 {
264  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
265  return false;
266  } else {
267  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
268  return true;
269  }
270 }
271 
272 template<class Scalar, class LO, class GO, class Node>
273 bool
275 replaceGlobalValues (const GO globalRowIndex,
276  const LO colIndex,
277  const Scalar vals[])
278 {
279  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
280  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
281  return false;
282  } else {
283  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
284  return true;
285  }
286 }
287 
288 template<class Scalar, class LO, class GO, class Node>
289 void
291 sumIntoLocalValuesImpl (const LO localRowIndex,
292  const LO colIndex,
293  const Scalar vals[])
294 {
295  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
296  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
297  getBlockSize ());
298  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
299 }
300 
301 template<class Scalar, class LO, class GO, class Node>
302 bool
304 sumIntoLocalValues (const LO localRowIndex,
305  const LO colIndex,
306  const Scalar vals[])
307 {
308  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
309  return false;
310  } else {
311  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
312  return true;
313  }
314 }
315 
316 template<class Scalar, class LO, class GO, class Node>
317 bool
319 sumIntoGlobalValues (const GO globalRowIndex,
320  const LO colIndex,
321  const Scalar vals[])
322 {
323  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
324  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
325  return false;
326  } else {
327  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
328  return true;
329  }
330 }
331 
332 
333 template<class Scalar, class LO, class GO, class Node>
334 typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
336 getLocalBlockHost (const LO localRowIndex,
337  const LO colIndex,
338  const Access::ReadOnlyStruct) const
339 {
340  if (!isValidLocalMeshIndex(localRowIndex)) {
341  return const_little_host_vec_type();
342  } else {
343  const size_t blockSize = getBlockSize();
344  auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345  LO startRow = localRowIndex*blockSize;
346  LO endRow = startRow + blockSize;
347  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
348  colIndex);
349  }
350 }
351 
352 template<class Scalar, class LO, class GO, class Node>
353 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
354 BlockMultiVector<Scalar, LO, GO, Node>::
355 getLocalBlockHost (const LO localRowIndex,
356  const LO colIndex,
357  const Access::OverwriteAllStruct)
358 {
359  if (!isValidLocalMeshIndex(localRowIndex)) {
360  return little_host_vec_type();
361  } else {
362  const size_t blockSize = getBlockSize();
363  auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
364  LO startRow = localRowIndex*blockSize;
365  LO endRow = startRow + blockSize;
366  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
367  colIndex);
368  }
369 }
370 
371 template<class Scalar, class LO, class GO, class Node>
372 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
374 getLocalBlockHost (const LO localRowIndex,
375  const LO colIndex,
376  const Access::ReadWriteStruct)
377 {
378  if (!isValidLocalMeshIndex(localRowIndex)) {
379  return little_host_vec_type();
380  } else {
381  const size_t blockSize = getBlockSize();
382  auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
383  LO startRow = localRowIndex*blockSize;
384  LO endRow = startRow + blockSize;
385  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
386  colIndex);
387  }
388 }
389 
390 template<class Scalar, class LO, class GO, class Node>
391 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
392 BlockMultiVector<Scalar, LO, GO, Node>::
393 getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
394 {
395  using Teuchos::rcp;
396  using Teuchos::rcpFromRef;
397 
398  // The source object of an Import or Export must be a
399  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
400  // them in that order; one must succeed. Note that the target of
401  // the Import or Export calls checkSizes in DistObject's doTransfer.
402  typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
403  const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
404  if (srcBlkVec == nullptr) {
405  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
406  if (srcMultiVec == nullptr) {
407  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
408  // currently does a shallow copy by default. This is why we
409  // return by RCP, rather than by value.
410  return rcp (new mv_type ());
411  } else { // src is a MultiVector
412  return rcp (srcMultiVec, false); // nonowning RCP
413  }
414  } else { // src is a BlockMultiVector
415  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
416  }
417 }
418 
419 template<class Scalar, class LO, class GO, class Node>
422 {
423  return ! getMultiVectorFromSrcDistObject (src).is_null ();
424 }
425 
426 template<class Scalar, class LO, class GO, class Node>
429 (const SrcDistObject& src,
430  const size_t numSameIDs,
431  const Kokkos::DualView<const local_ordinal_type*,
432  buffer_device_type>& permuteToLIDs,
433  const Kokkos::DualView<const local_ordinal_type*,
434  buffer_device_type>& permuteFromLIDs,
435  const CombineMode CM)
436 {
437  TEUCHOS_TEST_FOR_EXCEPTION
438  (true, std::logic_error,
439  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
440  "instead, create a point importer using makePointMap function.");
441 }
442 
443 template<class Scalar, class LO, class GO, class Node>
444 void BlockMultiVector<Scalar, LO, GO, Node>::
445 packAndPrepare
446 (const SrcDistObject& src,
447  const Kokkos::DualView<const local_ordinal_type*,
448  buffer_device_type>& exportLIDs,
449  Kokkos::DualView<packet_type*,
450  buffer_device_type>& exports,
451  Kokkos::DualView<size_t*,
452  buffer_device_type> numPacketsPerLID,
453  size_t& constantNumPackets)
454 {
455  TEUCHOS_TEST_FOR_EXCEPTION
456  (true, std::logic_error,
457  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
458  "instead, create a point importer using makePointMap function.");
459 }
460 
461 template<class Scalar, class LO, class GO, class Node>
462 void BlockMultiVector<Scalar, LO, GO, Node>::
463 unpackAndCombine
464 (const Kokkos::DualView<const local_ordinal_type*,
465  buffer_device_type>& importLIDs,
466  Kokkos::DualView<packet_type*,
467  buffer_device_type> imports,
468  Kokkos::DualView<size_t*,
469  buffer_device_type> numPacketsPerLID,
470  const size_t constantNumPackets,
471  const CombineMode combineMode)
472 {
473  TEUCHOS_TEST_FOR_EXCEPTION
474  (true, std::logic_error,
475  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
476  "instead, create a point importer using makePointMap function.");
477 }
478 
479 template<class Scalar, class LO, class GO, class Node>
481 isValidLocalMeshIndex (const LO meshLocalIndex) const
482 {
483  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
484  meshMap_.isNodeLocalElement (meshLocalIndex);
485 }
486 
487 template<class Scalar, class LO, class GO, class Node>
489 putScalar (const Scalar& val)
490 {
491  mv_.putScalar (val);
492 }
493 
494 template<class Scalar, class LO, class GO, class Node>
496 scale (const Scalar& val)
497 {
498  mv_.scale (val);
499 }
500 
501 template<class Scalar, class LO, class GO, class Node>
503 update (const Scalar& alpha,
505  const Scalar& beta)
506 {
507  mv_.update (alpha, X.mv_, beta);
508 }
509 
510 namespace Impl {
511 // Y := alpha * D * X
512 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
513 struct BlockWiseMultiply {
514  typedef typename ViewD::size_type Size;
515 
516 private:
517  typedef typename ViewD::device_type Device;
518  typedef typename ViewD::non_const_value_type ImplScalar;
519  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
520 
521  template <typename View>
522  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
523  typename View::device_type, Unmanaged>;
524  typedef UnmanagedView<ViewY> UnMViewY;
525  typedef UnmanagedView<ViewD> UnMViewD;
526  typedef UnmanagedView<ViewX> UnMViewX;
527 
528  const Size block_size_;
529  Scalar alpha_;
530  UnMViewY Y_;
531  UnMViewD D_;
532  UnMViewX X_;
533 
534 public:
535  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
536  const ViewY& Y, const ViewD& D, const ViewX& X)
537  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
538  {}
539 
540  KOKKOS_INLINE_FUNCTION
541  void operator() (const Size k) const {
542  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
543  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
544  const auto num_vecs = X_.extent(1);
545  for (Size i = 0; i < num_vecs; ++i) {
546  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
547  auto X_curBlk = Kokkos::subview(X_, kslice, i);
548  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
549  // Y_curBlk := alpha * D_curBlk * X_curBlk.
550  // Recall that GEMV does an update (+=) of the last argument.
551  Tpetra::FILL(Y_curBlk, zero);
552  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
553  }
554  }
555 };
556 
557 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
558 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
559 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
560  const ViewY& Y, const ViewD& D, const ViewX& X) {
561  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
562 }
563 
564 template <typename ViewY,
565  typename Scalar,
566  typename ViewD,
567  typename ViewZ,
568  typename LO = typename ViewY::size_type>
569 class BlockJacobiUpdate {
570 private:
571  typedef typename ViewD::device_type Device;
572  typedef typename ViewD::non_const_value_type ImplScalar;
573  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
574 
575  template <typename ViewType>
576  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
577  typename ViewType::array_layout,
578  typename ViewType::device_type,
579  Unmanaged>;
580  typedef UnmanagedView<ViewY> UnMViewY;
581  typedef UnmanagedView<ViewD> UnMViewD;
582  typedef UnmanagedView<ViewZ> UnMViewZ;
583 
584  const LO blockSize_;
585  UnMViewY Y_;
586  const Scalar alpha_;
587  UnMViewD D_;
588  UnMViewZ Z_;
589  const Scalar beta_;
590 
591 public:
592  BlockJacobiUpdate (const ViewY& Y,
593  const Scalar& alpha,
594  const ViewD& D,
595  const ViewZ& Z,
596  const Scalar& beta) :
597  blockSize_ (D.extent (1)),
598  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
599  Y_ (Y),
600  alpha_ (alpha),
601  D_ (D),
602  Z_ (Z),
603  beta_ (beta)
604  {
605  static_assert (static_cast<int> (ViewY::rank) == 1,
606  "Y must have rank 1.");
607  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
608  static_assert (static_cast<int> (ViewZ::rank) == 1,
609  "Z must have rank 1.");
610  // static_assert (static_cast<int> (ViewZ::rank) ==
611  // static_cast<int> (ViewY::rank),
612  // "Z must have the same rank as Y.");
613  }
614 
615  KOKKOS_INLINE_FUNCTION void
616  operator() (const LO& k) const
617  {
618  using Kokkos::ALL;
619  using Kokkos::subview;
620  typedef Kokkos::pair<LO, LO> range_type;
621  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
622 
623  // We only have to implement the alpha != 0 case.
624 
625  auto D_curBlk = subview (D_, k, ALL (), ALL ());
626  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
627 
628  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
629 
630  auto Z_curBlk = subview (Z_, kslice);
631  auto Y_curBlk = subview (Y_, kslice);
632  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
633  if (beta_ == KAT::zero ()) {
634  Tpetra::FILL (Y_curBlk, KAT::zero ());
635  }
636  else if (beta_ != KAT::one ()) {
637  Tpetra::SCAL (beta_, Y_curBlk);
638  }
639  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
640  }
641 };
642 
643 template<class ViewY,
644  class Scalar,
645  class ViewD,
646  class ViewZ,
647  class LO = typename ViewD::size_type>
648 void
649 blockJacobiUpdate (const ViewY& Y,
650  const Scalar& alpha,
651  const ViewD& D,
652  const ViewZ& Z,
653  const Scalar& beta)
654 {
655  static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
656  static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
657  static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
658  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
659  "Y and Z must have the same rank.");
660  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
661 
662  const auto lclNumMeshRows = D.extent (0);
663 
664 #ifdef HAVE_TPETRA_DEBUG
665  // D.extent(0) is the (local) number of mesh rows.
666  // D.extent(1) is the block size. Thus, their product should be
667  // the local number of point rows, that is, the number of rows in Y.
668  const auto blkSize = D.extent (1);
669  const auto lclNumPtRows = lclNumMeshRows * blkSize;
670  TEUCHOS_TEST_FOR_EXCEPTION
671  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
672  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
673  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
674  << " = " << lclNumPtRows << ".");
675  TEUCHOS_TEST_FOR_EXCEPTION
676  (Y.extent (0) != Z.extent (0), std::invalid_argument,
677  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
678  "Z.extent(0) = " << Z.extent (0) << ".");
679  TEUCHOS_TEST_FOR_EXCEPTION
680  (Y.extent (1) != Z.extent (1), std::invalid_argument,
681  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
682  "Z.extent(1) = " << Z.extent (1) << ".");
683 #endif // HAVE_TPETRA_DEBUG
684 
685  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
686  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
687  // lclNumMeshRows must fit in LO, else the Map would not be correct.
688  range_type range (0, static_cast<LO> (lclNumMeshRows));
689  Kokkos::parallel_for (range, functor);
690 }
691 
692 } // namespace Impl
693 
694 template<class Scalar, class LO, class GO, class Node>
696 blockWiseMultiply (const Scalar& alpha,
697  const Kokkos::View<const impl_scalar_type***,
698  device_type, Kokkos::MemoryUnmanaged>& D,
700 {
701  using Kokkos::ALL;
702  typedef typename device_type::execution_space exec_space;
703  const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
704 
705  if (alpha == STS::zero ()) {
706  this->putScalar (STS::zero ());
707  }
708  else { // alpha != 0
709  const LO blockSize = this->getBlockSize ();
710  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
711  auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
712  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
713  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
714 
715  // Use an explicit RangePolicy with the desired execution space.
716  // Otherwise, if you just use a number, it runs on the default
717  // execution space. For example, if the default execution space
718  // is Cuda but the current execution space is Serial, using just a
719  // number would incorrectly run with Cuda.
720  Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
721  Kokkos::parallel_for (range, bwm);
722  }
723 }
724 
725 template<class Scalar, class LO, class GO, class Node>
727 blockJacobiUpdate (const Scalar& alpha,
728  const Kokkos::View<const impl_scalar_type***,
729  device_type, Kokkos::MemoryUnmanaged>& D,
732  const Scalar& beta)
733 {
734  using Kokkos::ALL;
735  using Kokkos::subview;
736  typedef impl_scalar_type IST;
737 
738  const IST alphaImpl = static_cast<IST> (alpha);
739  const IST betaImpl = static_cast<IST> (beta);
740  const LO numVecs = mv_.getNumVectors ();
741 
742  if (alpha == STS::zero ()) { // Y := beta * Y
743  this->scale (beta);
744  }
745  else { // alpha != 0
746  Z.update (STS::one (), X, -STS::one ());
747  for (LO j = 0; j < numVecs; ++j) {
748  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
749  auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
750  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
751  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
752  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
753  }
754  }
755 }
756 
757 } // namespace Tpetra
758 
759 //
760 // Explicit instantiation macro
761 //
762 // Must be expanded from within the Tpetra namespace!
763 //
764 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
765  template class BlockMultiVector< S, LO, GO, NODE >;
766 
767 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Linear algebra kernels for small dense matrices and vectors.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
MultiVector for multiple degrees of freedom per mesh point.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
typename mv_type::device_type device_type
The Kokkos Device type.
CombineMode
Rule for combining data in an Import or Export.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Abstract base class for objects that can be the source of an Import or Export operation.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector&#39;s local data on device. This requires that th...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.