Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 #include <string>
58 
81 
82 namespace Tpetra {
83 
84 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 // Forward declaration of Distributor
86 class Distributor;
87 #endif // DOXYGEN_SHOULD_SKIP_THIS
88 
89 //
90 // Users must never rely on anything in the Details namespace.
91 //
92 namespace Details {
93 
94 namespace PackCrsMatrixImpl {
102 template<class OutputOffsetsViewType,
103  class CountsViewType,
104  class InputOffsetsViewType,
105  class InputLocalRowIndicesViewType,
106  class InputLocalRowPidsViewType,
107  const bool debug =
108 #ifdef HAVE_TPETRA_DEBUG
109  true
110 #else
111  false
112 #endif // HAVE_TPETRA_DEBUG
113  >
115 public:
116  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
117  typedef typename CountsViewType::non_const_value_type count_type;
118  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
119  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
120  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
121  // output Views drive where execution happens.
122  typedef typename OutputOffsetsViewType::device_type device_type;
123  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
124  typename device_type::execution_space>::value,
125  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
126  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
127  "OutputOffsetsViewType must be a Kokkos::View.");
128  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
129  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
130  static_assert (std::is_integral<output_offset_type>::value,
131  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
132  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
133  "CountsViewType must be a Kokkos::View.");
134  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
135  "CountsViewType must be a nonconst Kokkos::View.");
136  static_assert (std::is_integral<count_type>::value,
137  "The type of each entry of CountsViewType must be a built-in integer type.");
138  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
139  "InputOffsetsViewType must be a Kokkos::View.");
140  static_assert (std::is_integral<input_offset_type>::value,
141  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
142  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
143  "InputLocalRowIndicesViewType must be a Kokkos::View.");
144  static_assert (std::is_integral<local_row_index_type>::value,
145  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
146 
147  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
148  const CountsViewType& counts,
149  const InputOffsetsViewType& rowOffsets,
150  const InputLocalRowIndicesViewType& lclRowInds,
151  const InputLocalRowPidsViewType& lclRowPids,
152  const count_type sizeOfLclCount,
153  const count_type sizeOfGblColInd,
154  const count_type sizeOfPid,
155  const count_type sizeOfValue) :
156  outputOffsets_ (outputOffsets),
157  counts_ (counts),
158  rowOffsets_ (rowOffsets),
159  lclRowInds_ (lclRowInds),
160  lclRowPids_ (lclRowPids),
161  sizeOfLclCount_ (sizeOfLclCount),
162  sizeOfGblColInd_ (sizeOfGblColInd),
163  sizeOfPid_ (sizeOfPid),
164  sizeOfValue_ (sizeOfValue),
165  error_ ("error") // don't forget this, or you'll get segfaults!
166  {
167  if (debug) {
168  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
169 
170  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
171  std::ostringstream os;
172  os << "lclRowInds.extent(0) = " << numRowsToPack
173  << " != counts.extent(0) = " << counts_.extent (0)
174  << ".";
175  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
176  }
177  if (static_cast<size_t> (numRowsToPack + 1) !=
178  static_cast<size_t> (outputOffsets_.extent (0))) {
179  std::ostringstream os;
180  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
181  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
182  << ".";
183  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
184  }
185  }
186  }
187 
188  KOKKOS_INLINE_FUNCTION void
189  operator() (const local_row_index_type& curInd,
190  output_offset_type& update,
191  const bool final) const
192  {
193  if (debug) {
194  if (curInd < static_cast<local_row_index_type> (0)) {
195  error_ () = 1;
196  return;
197  }
198  }
199 
200  if (final) {
201  if (debug) {
202  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
203  error_ () = 2;
204  return;
205  }
206  }
207  outputOffsets_(curInd) = update;
208  }
209 
210  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
211  const auto lclRow = lclRowInds_(curInd);
212  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
213  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
214  error_ () = 3;
215  return;
216  }
217  // count_type could differ from the type of each row offset.
218  // For example, row offsets might each be 64 bits, but if their
219  // difference always fits in 32 bits, we may then safely use a
220  // 32-bit count_type.
221  const count_type count =
222  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
223 
224  // We pack first the number of entries in the row, then that
225  // many global column indices, then that many pids (if any),
226  // then that many values. However, if the number of entries in
227  // the row is zero, we pack nothing.
228  const count_type numBytes = (count == 0) ?
229  static_cast<count_type> (0) :
230  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
231  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
232  sizeOfValue_);
233 
234  if (final) {
235  counts_(curInd) = numBytes;
236  }
237  update += numBytes;
238  }
239  }
240 
241  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
242  // have join both with and without volatile! Otherwise intrawarp
243  // joins are really slow on GPUs.
244 
246  int getError () const {
247  auto error_h = Kokkos::create_mirror_view (error_);
248  Kokkos::deep_copy (error_h, error_);
249  return error_h ();
250  }
251 
252 private:
253  OutputOffsetsViewType outputOffsets_;
254  CountsViewType counts_;
255  typename InputOffsetsViewType::const_type rowOffsets_;
256  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
257  typename InputLocalRowPidsViewType::const_type lclRowPids_;
258  count_type sizeOfLclCount_;
259  count_type sizeOfGblColInd_;
260  count_type sizeOfPid_;
261  count_type sizeOfValue_;
262  Kokkos::View<int, device_type> error_;
263 };
264 
274 template<class OutputOffsetsViewType,
275  class CountsViewType,
276  class InputOffsetsViewType,
277  class InputLocalRowIndicesViewType,
278  class InputLocalRowPidsViewType>
279 typename CountsViewType::non_const_value_type
280 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
281  const CountsViewType& counts,
282  const InputOffsetsViewType& rowOffsets,
283  const InputLocalRowIndicesViewType& lclRowInds,
284  const InputLocalRowPidsViewType& lclRowPids,
285  const typename CountsViewType::non_const_value_type sizeOfLclCount,
286  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
287  const typename CountsViewType::non_const_value_type sizeOfPid,
288  const typename CountsViewType::non_const_value_type sizeOfValue)
289 {
290  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
291  CountsViewType, typename InputOffsetsViewType::const_type,
292  typename InputLocalRowIndicesViewType::const_type,
293  typename InputLocalRowPidsViewType::const_type> functor_type;
294  typedef typename CountsViewType::non_const_value_type count_type;
295  typedef typename OutputOffsetsViewType::size_type size_type;
296  typedef typename OutputOffsetsViewType::execution_space execution_space;
297  typedef typename functor_type::local_row_index_type LO;
298  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
299  const char prefix[] = "computeNumPacketsAndOffsets: ";
300 
301  count_type count = 0;
302  const count_type numRowsToPack = lclRowInds.extent (0);
303 
304  if (numRowsToPack == 0) {
305  return count;
306  }
307  else {
308  TEUCHOS_TEST_FOR_EXCEPTION
309  (rowOffsets.extent (0) <= static_cast<size_type> (1),
310  std::invalid_argument, prefix << "There is at least one row to pack, "
311  "but the matrix has no rows. lclRowInds.extent(0) = " <<
312  numRowsToPack << ", but rowOffsets.extent(0) = " <<
313  rowOffsets.extent (0) << " <= 1.");
314  TEUCHOS_TEST_FOR_EXCEPTION
315  (outputOffsets.extent (0) !=
316  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
317  prefix << "Output dimension does not match number of rows to pack. "
318  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
319  << " != lclRowInds.extent(0) + 1 = "
320  << static_cast<size_type> (numRowsToPack + 1) << ".");
321  TEUCHOS_TEST_FOR_EXCEPTION
322  (counts.extent (0) != numRowsToPack, std::invalid_argument,
323  prefix << "counts.extent(0) = " << counts.extent (0)
324  << " != numRowsToPack = " << numRowsToPack << ".");
325 
326  functor_type f (outputOffsets, counts, rowOffsets,
327  lclRowInds, lclRowPids, sizeOfLclCount,
328  sizeOfGblColInd, sizeOfPid, sizeOfValue);
329  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
330 
331  // At least in debug mode, this functor checks for errors.
332  const int errCode = f.getError ();
333  TEUCHOS_TEST_FOR_EXCEPTION
334  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
335  << errCode << " != 0.");
336 
337 #if 0
338  size_t total = 0;
339  for (LO k = 0; k < numRowsToPack; ++k) {
340  total += counts[k];
341  }
342  if (outputOffsets(numRowsToPack) != total) {
343  if (errStr.get () == NULL) {
344  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
345  }
346  std::ostringstream& os = *errStr;
347  os << prefix
348  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
349  << outputOffsets(numRowsToPack) << " != sum of counts = "
350  << total << "." << std::endl;
351  if (numRowsToPack != 0) {
352  // Only print the array if it's not too long.
353  if (numRowsToPack < static_cast<LO> (10)) {
354  os << "outputOffsets: [";
355  for (LO i = 0; i <= numRowsToPack; ++i) {
356  os << outputOffsets(i);
357  if (static_cast<LO> (i + 1) <= numRowsToPack) {
358  os << ",";
359  }
360  }
361  os << "]" << std::endl;
362  os << "counts: [";
363  for (LO i = 0; i < numRowsToPack; ++i) {
364  os << counts(i);
365  if (static_cast<LO> (i + 1) < numRowsToPack) {
366  os << ",";
367  }
368  }
369  os << "]" << std::endl;
370  }
371  else {
372  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
373  << outputOffsets(numRowsToPack-1) << "." << std::endl;
374  }
375  }
376  count = outputOffsets(numRowsToPack);
377  return {false, errStr};
378  }
379 #endif // HAVE_TPETRA_DEBUG
380 
381  // Get last entry of outputOffsets, which is the sum of the entries
382  // of counts. Don't assume UVM.
383  using Tpetra::Details::getEntryOnHost;
384  return static_cast<count_type> (getEntryOnHost (outputOffsets,
385  numRowsToPack));
386  }
387 }
388 
404 template<class ST, class ColumnMap, class BufferDeviceType>
405 KOKKOS_FUNCTION
406 Kokkos::pair<int, size_t>
407 packCrsMatrixRow (const ColumnMap& col_map,
408  const Kokkos::View<char*, BufferDeviceType>& exports,
409  const typename PackTraits<typename ColumnMap::local_ordinal_type>::input_array_type& lids_in,
410  const typename PackTraits<int>::input_array_type& pids_in,
411  const typename PackTraits<ST>::input_array_type& vals_in,
412  const size_t offset,
413  const size_t num_ent,
414  const size_t num_bytes_per_value,
415  const bool pack_pids)
416 {
417  using Kokkos::subview;
418  using LO = typename ColumnMap::local_ordinal_type;
419  using GO = typename ColumnMap::global_ordinal_type;
420  using return_type = Kokkos::pair<int, size_t>;
421 
422  if (num_ent == 0) {
423  // Empty rows always take zero bytes, to ensure sparsity.
424  return return_type (0, 0);
425  }
426 
427  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
428  const size_t num_ent_beg = offset;
429  const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
430 
431  const size_t gids_beg = num_ent_beg + num_ent_len;
432  const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
433 
434  const size_t pids_beg = gids_beg + gids_len;
435  const size_t pids_len = pack_pids ?
436  num_ent * PackTraits<int>::packValueCount (int (0)) :
437  static_cast<size_t> (0);
438 
439  const size_t vals_beg = gids_beg + gids_len + pids_len;
440  const size_t vals_len = num_ent * num_bytes_per_value;
441 
442  char* const num_ent_out = exports.data () + num_ent_beg;
443  char* const gids_out = exports.data () + gids_beg;
444  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
445  char* const vals_out = exports.data () + vals_beg;
446 
447  size_t num_bytes_out = 0;
448  int error_code = 0;
449  num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
450 
451  {
452  // Copy column indices one at a time, so that we don't need
453  // temporary storage.
454  for (size_t k = 0; k < num_ent; ++k) {
455  const LO lid = lids_in[k];
456  const GO gid = col_map.getGlobalElement (lid);
457  num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
458  }
459  // Copy PIDs one at a time, so that we don't need temporary storage.
460  if (pack_pids) {
461  for (size_t k = 0; k < num_ent; ++k) {
462  const LO lid = lids_in[k];
463  const int pid = pids_in[lid];
464  num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
465  }
466  }
467  const auto p =
468  PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
469  error_code += p.first;
470  num_bytes_out += p.second;
471  }
472 
473  if (error_code != 0) {
474  return return_type (10, num_bytes_out);
475  }
476 
477  const size_t expected_num_bytes =
478  num_ent_len + gids_len + pids_len + vals_len;
479  if (num_bytes_out != expected_num_bytes) {
480  return return_type (11, num_bytes_out);
481  }
482  return return_type (0, num_bytes_out);
483 }
484 
485 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
486 struct PackCrsMatrixFunctor {
487  typedef LocalMatrix local_matrix_type;
488  typedef LocalMap local_map_type;
489  typedef typename local_matrix_type::value_type ST;
490  typedef typename local_map_type::local_ordinal_type LO;
491  typedef typename local_map_type::global_ordinal_type GO;
492  typedef typename local_matrix_type::device_type DT;
493 
494  typedef Kokkos::View<const size_t*, BufferDeviceType>
495  num_packets_per_lid_view_type;
496  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
497  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
498  using export_lids_view_type = typename PackTraits<LO>::input_array_type;
499  using source_pids_view_type = typename PackTraits<int>::input_array_type;
500 
501  typedef typename num_packets_per_lid_view_type::non_const_value_type
502  count_type;
503  typedef typename offsets_view_type::non_const_value_type
504  offset_type;
505  typedef Kokkos::pair<int, LO> value_type;
506 
507  static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
508  "local_map_type::local_ordinal_type and "
509  "local_matrix_type::ordinal_type must be the same.");
510 
511  local_matrix_type local_matrix;
512  local_map_type local_col_map;
513  exports_view_type exports;
514  num_packets_per_lid_view_type num_packets_per_lid;
515  export_lids_view_type export_lids;
516  source_pids_view_type source_pids;
517  offsets_view_type offsets;
518  size_t num_bytes_per_value;
519  bool pack_pids;
520 
521  PackCrsMatrixFunctor (const local_matrix_type& local_matrix_in,
522  const local_map_type& local_col_map_in,
523  const exports_view_type& exports_in,
524  const num_packets_per_lid_view_type& num_packets_per_lid_in,
525  const export_lids_view_type& export_lids_in,
526  const source_pids_view_type& source_pids_in,
527  const offsets_view_type& offsets_in,
528  const size_t num_bytes_per_value_in,
529  const bool pack_pids_in) :
530  local_matrix (local_matrix_in),
531  local_col_map (local_col_map_in),
532  exports (exports_in),
533  num_packets_per_lid (num_packets_per_lid_in),
534  export_lids (export_lids_in),
535  source_pids (source_pids_in),
536  offsets (offsets_in),
537  num_bytes_per_value (num_bytes_per_value_in),
538  pack_pids (pack_pids_in)
539  {
540  const LO numRows = local_matrix_in.numRows ();
541  const LO rowMapDim =
542  static_cast<LO> (local_matrix.graph.row_map.extent (0));
543  TEUCHOS_TEST_FOR_EXCEPTION
544  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
545  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
546  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
547  }
548 
549  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
550  {
551  using ::Tpetra::Details::OrdinalTraits;
552  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
553  }
554 
555  KOKKOS_INLINE_FUNCTION void
556  join (volatile value_type& dst, const volatile value_type& src) const
557  {
558  // `dst` should reflect the first (least) bad index and all other
559  // associated error codes and data, so prefer keeping it.
560  if (src.first != 0 && dst.first == 0) {
561  dst = src;
562  }
563  }
564 
565  KOKKOS_INLINE_FUNCTION
566  void operator() (const LO i, value_type& dst) const
567  {
568  const size_t offset = offsets[i];
569  const LO export_lid = export_lids[i];
570  const size_t buf_size = exports.size();
571  const size_t num_bytes = num_packets_per_lid(i);
572  const size_t num_ent =
573  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
574  - local_matrix.graph.row_map[export_lid]);
575 
576  // Only pack this row's data if it has a nonzero number of
577  // entries. We can do this because receiving processes get the
578  // number of packets, and will know that zero packets means zero
579  // entries.
580  if (num_ent == 0) {
581  return;
582  }
583 
584  if (export_lid >= local_matrix.numRows ()) {
585  if (dst.first != 0) { // keep only the first error
586  dst = Kokkos::make_pair (1, i); // invalid row
587  }
588  return;
589  }
590  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
591  if (dst.first != 0) { // keep only the first error
592  dst = Kokkos::make_pair (2, i); // out of bounds
593  }
594  return;
595  }
596 
597  // We can now pack this row
598 
599  // Since the matrix is locally indexed on the calling process, we
600  // have to use its column Map (which it _must_ have in this case)
601  // to convert to global indices.
602  const auto row_beg = local_matrix.graph.row_map[export_lid];
603  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
604  auto vals_in = subview (local_matrix.values,
605  Kokkos::make_pair (row_beg, row_end));
606  auto lids_in = subview (local_matrix.graph.entries,
607  Kokkos::make_pair (row_beg, row_end));
608  typedef local_map_type LMT;
609  typedef BufferDeviceType BDT;
610  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
611  source_pids, vals_in, offset,
612  num_ent, num_bytes_per_value,
613  pack_pids);
614  int error_code_this_row = p.first;
615  size_t num_bytes_packed_this_row = p.second;
616  if (error_code_this_row != 0) {
617  if (dst.first != 0) { // keep only the first error
618  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
619  }
620  }
621  else if (num_bytes_packed_this_row != num_bytes) {
622  if (dst.first != 0) { // keep only the first error
623  dst = Kokkos::make_pair (3, i);
624  }
625  }
626  }
627 };
628 
636 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
637 void
638 do_pack (const LocalMatrix& local_matrix,
639  const LocalMap& local_map,
640  const Kokkos::View<char*, BufferDeviceType>& exports,
641  const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
642  const typename PackTraits<typename LocalMap::local_ordinal_type>::input_array_type& export_lids,
643  const typename PackTraits<int>::input_array_type& source_pids,
644  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
645  const size_t num_bytes_per_value,
646  const bool pack_pids)
647 {
648  using LO = typename LocalMap::local_ordinal_type;
649  using DT = typename LocalMatrix::device_type;
650  using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
651  const char prefix[] = "Tpetra::Details::do_pack: ";
652 
653  if (export_lids.extent (0) != 0) {
654  TEUCHOS_TEST_FOR_EXCEPTION
655  (static_cast<size_t> (offsets.extent (0)) !=
656  static_cast<size_t> (export_lids.extent (0) + 1),
657  std::invalid_argument, prefix << "offsets.extent(0) = "
658  << offsets.extent (0) << " != export_lids.extent(0) (= "
659  << export_lids.extent (0) << ") + 1.");
660  TEUCHOS_TEST_FOR_EXCEPTION
661  (export_lids.extent (0) != num_packets_per_lid.extent (0),
662  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
663  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
664  << num_packets_per_lid.extent (0) << ".");
665  // If exports has nonzero length at this point, then the matrix
666  // has at least one entry to pack. Thus, if packing process
667  // ranks, we had better have at least one process rank to pack.
668  TEUCHOS_TEST_FOR_EXCEPTION
669  (pack_pids && exports.extent (0) != 0 &&
670  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
671  "pack_pids is true, and exports.extent(0) = " <<
672  exports.extent (0) << " != 0, meaning that we need to pack at "
673  "least one matrix entry, but source_pids.extent(0) = 0.");
674  }
675 
676  using pack_functor_type =
677  PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
678  pack_functor_type f (local_matrix, local_map, exports,
679  num_packets_per_lid, export_lids,
680  source_pids, offsets, num_bytes_per_value,
681  pack_pids);
682 
683  typename pack_functor_type::value_type result;
684  range_type range (0, num_packets_per_lid.extent (0));
685  Kokkos::parallel_reduce (range, f, result);
686 
687  if (result.first != 0) {
688  // We can't deep_copy from AnonymousSpace Views, so we can't print
689  // out any information from them in case of error.
690  TEUCHOS_TEST_FOR_EXCEPTION
691  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
692  "reported error code " << result.first << " for the first "
693  "bad row " << result.second << ".");
694  }
695 }
696 
726 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
727 void
728 packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
729  Kokkos::DualView<char*, BufferDeviceType>& exports,
730  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
731  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
732  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
733  size_t& constant_num_packets,
734  const bool pack_pids,
735  Distributor& /* dist */)
736 {
737  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
738  "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
739  "Import/Export"
740  );
741  using Kokkos::View;
742  typedef BufferDeviceType DT;
743  typedef typename DT::execution_space execution_space;
744  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
745  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
746  constexpr bool debug = false;
747 
748  auto local_matrix = sourceMatrix.getLocalMatrix ();
749  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
750 
751  // Setting this to zero tells the caller to expect a possibly
752  // different ("nonconstant") number of packets per local index
753  // (i.e., a possibly different number of entries per row).
754  constant_num_packets = 0;
755 
756  const size_t num_export_lids =
757  static_cast<size_t> (export_lids.extent (0));
758  TEUCHOS_TEST_FOR_EXCEPTION
759  (num_export_lids !=
760  static_cast<size_t> (num_packets_per_lid.extent (0)),
761  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
762  << num_export_lids << " != num_packets_per_lid.extent(0) = "
763  << num_packets_per_lid.extent (0) << ".");
764  if (num_export_lids != 0) {
765  TEUCHOS_TEST_FOR_EXCEPTION
766  (num_packets_per_lid.data () == NULL, std::invalid_argument,
767  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
768  "num_packets_per_lid.data() = "
769  << num_packets_per_lid.data () << " == NULL.");
770  }
771 
772  const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
773  const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
774  const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
775 
776  size_t num_bytes_per_value = 0;
777  if (PackTraits<ST>::compileTimeSize) {
778  // Assume ST is default constructible; packValueCount wants an instance.
779  num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
780  }
781  else {
782  // Since the packed data come from the source matrix, we can use
783  // the source matrix to get the number of bytes per Scalar value
784  // stored in the matrix. This assumes that all Scalar values in
785  // the source matrix require the same number of bytes. If the
786  // source matrix has no entries on the calling process, then we
787  // hope that some process does have some idea how big a Scalar
788  // value is. Of course, if no processes have any entries, then no
789  // values should be packed (though this does assume that in our
790  // packing scheme, rows with zero entries take zero bytes).
791  size_t num_bytes_per_value_l = 0;
792  if (local_matrix.values.extent(0) > 0) {
793  const ST& val = local_matrix.values(0);
794  num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
795  }
796  using Teuchos::reduceAll;
797  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
798  Teuchos::REDUCE_MAX,
799  num_bytes_per_value_l,
800  Teuchos::outArg (num_bytes_per_value));
801  }
802 
803  if (num_export_lids == 0) {
804  exports = exports_view_type ("exports", 0);
805  return;
806  }
807 
808  // Array of offsets into the pack buffer.
809  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
810 
811  // Compute number of packets per LID (row to send), as well as
812  // corresponding offsets (the prefix sum of the packet counts).
813  const size_t count =
814  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
815  local_matrix.graph.row_map, export_lids,
816  export_pids,
817  num_bytes_per_lid, num_bytes_per_gid,
818  num_bytes_per_pid, num_bytes_per_value);
819 
820  // Resize the output pack buffer if needed.
821  if (count > static_cast<size_t> (exports.extent (0))) {
822  exports = exports_view_type ("exports", count);
823  if (debug) {
824  std::ostringstream os;
825  os << "*** exports resized to " << count << std::endl;
826  std::cerr << os.str ();
827  }
828  }
829  if (debug) {
830  std::ostringstream os;
831  os << "*** count: " << count << ", exports.extent(0): "
832  << exports.extent (0) << std::endl;
833  std::cerr << os.str ();
834  }
835 
836  // If exports has nonzero length at this point, then the matrix has
837  // at least one entry to pack. Thus, if packing process ranks, we
838  // had better have at least one process rank to pack.
839  TEUCHOS_TEST_FOR_EXCEPTION
840  (pack_pids && exports.extent (0) != 0 &&
841  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
842  "pack_pids is true, and exports.extent(0) = " <<
843  exports.extent (0) << " != 0, meaning that we need to pack at least "
844  "one matrix entry, but export_pids.extent(0) = 0.");
845 
846  typedef typename std::decay<decltype (local_matrix)>::type
847  local_matrix_type;
848  typedef typename std::decay<decltype (local_col_map)>::type
849  local_map_type;
850 
851  exports.modify_device ();
852  auto exports_d = exports.view_device ();
853  do_pack<local_matrix_type, local_map_type, DT>
854  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
855  export_lids, export_pids, offsets, num_bytes_per_value,
856  pack_pids);
857  // If we got this far, we succeeded.
858 }
859 
860 } // namespace PackCrsMatrixImpl
861 
862 template<typename ST, typename LO, typename GO, typename NT>
863 void
865  Teuchos::Array<char>& exports,
866  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
867  const Teuchos::ArrayView<const LO>& exportLIDs,
868  size_t& constantNumPackets,
869  Distributor& distor)
870 {
871  using local_matrix_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type;
872  using device_type = typename local_matrix_type::device_type;
873  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
874  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
875  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
876 
877  // Convert all Teuchos::Array to Kokkos::View
878 
879  // This is an output array, so we don't have to copy to device here.
880  // However, we'll have to remember to copy back to host when done.
881  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
882  create_mirror_view_from_raw_host_array (buffer_device_type (),
883  numPacketsPerLID.getRawPtr (),
884  numPacketsPerLID.size (), false,
885  "num_packets_per_lid");
886  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
887  // DualView through here, instead of recreating a device View from a
888  // host ArrayView that itself came from a DualView.
889  //
890  // This is an input array, so we have to copy to device here.
891  // However, we never need to copy it back to host.
892  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
893  create_mirror_view_from_raw_host_array (buffer_device_type (),
894  exportLIDs.getRawPtr (),
895  exportLIDs.size (), true,
896  "export_lids");
897 
898  Kokkos::View<int*, device_type> export_pids_d; // output arg
899  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
900  constexpr bool pack_pids = false;
901  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
902  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
903  export_pids_d, constantNumPackets, pack_pids, distor);
904 
905  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
906  // copy them back to host.
907  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
908  (numPacketsPerLID.getRawPtr (),
909  numPacketsPerLID.size ());
910  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
911 
912  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
913  // exports_dv above, then we have two host copies for exports_h.
914 
915  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
916  // to copy them back to host.
917  if (static_cast<size_t> (exports.size ()) !=
918  static_cast<size_t> (exports_dv.extent (0))) {
919  exports.resize (exports_dv.extent (0));
920  }
921  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
922  exports.size ());
923  Kokkos::deep_copy (exports_h, exports_dv.d_view);
924 }
925 
926 template<typename ST, typename LO, typename GO, typename NT>
927 void
929  const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
930  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
931  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
932  const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
933  size_t& constantNumPackets,
934  Distributor& distor
935 )
936 {
937  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
938  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
939 
940  // Create an empty array of PIDs, since the interface needs it.
941  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
942  constexpr bool pack_pids = false;
943 
944  // Write-only device access
945  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
946  numPacketsPerLID_nc.clear_sync_state ();
947  numPacketsPerLID_nc.modify_device ();
948  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
949 
950  // Read-only device access
951  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
952  auto exportLIDs_d = exportLIDs.view_device ();
953 
954  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
955  "Tpetra::Details::packCrsMatrixNew",
956  "Import/Export"
957  );
958  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
959  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
960  exportPIDs_d, constantNumPackets, pack_pids, distor);
961 }
962 
963 template<typename ST, typename LO, typename GO, typename NT>
964 void
966  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
967  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
968  const Teuchos::ArrayView<const LO>& exportLIDs,
969  const Teuchos::ArrayView<const int>& sourcePIDs,
970  size_t& constantNumPackets,
971  Distributor& distor)
972 {
973  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
974  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
975  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
976  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
977 
978  typename local_matrix_type::device_type outputDevice;
979 
980  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
981  std::unique_ptr<std::string> prefix;
982  if (verbose) {
983  const int myRank = [&] () {
984  auto map = sourceMatrix.getMap ();
985  if (map.get () == nullptr) {
986  return -1;
987  }
988  auto comm = map->getComm ();
989  if (comm.get () == nullptr) {
990  return -2;
991  }
992  return comm->getRank ();
993  } ();
994  std::ostringstream os;
995  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
996  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
997 
998  std::ostringstream os2;
999  os2 << *prefix << "start" << std::endl;
1000  std::cerr << os2.str ();
1001  }
1002 
1003  // Convert all Teuchos::Array to Kokkos::View
1004 
1005  // This is an output array, so we don't have to copy to device here.
1006  // However, we'll have to remember to copy back to host when done.
1007  auto num_packets_per_lid_d =
1008  create_mirror_view_from_raw_host_array (buffer_device_type (),
1009  numPacketsPerLID.getRawPtr (),
1010  numPacketsPerLID.size (), false,
1011  "num_packets_per_lid");
1012 
1013  // This is an input array, so we have to copy to device here.
1014  // However, we never need to copy it back to host.
1015  auto export_lids_d =
1016  create_mirror_view_from_raw_host_array (buffer_device_type (),
1017  exportLIDs.getRawPtr (),
1018  exportLIDs.size (), true,
1019  "export_lids");
1020  // This is an input array, so we have to copy to device here.
1021  // However, we never need to copy it back to host.
1022  auto export_pids_d =
1024  sourcePIDs.getRawPtr (),
1025  sourcePIDs.size (), true,
1026  "export_pids");
1027  constexpr bool pack_pids = true;
1028  try {
1030  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1031  export_pids_d, constantNumPackets, pack_pids, distor);
1032  }
1033  catch (std::exception& e) {
1034  if (verbose) {
1035  std::ostringstream os;
1036  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1037  << e.what () << std::endl;
1038  std::cerr << os.str ();
1039  }
1040  throw;
1041  }
1042  catch (...) {
1043  if (verbose) {
1044  std::ostringstream os;
1045  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1046  "not a subclass of std::exception" << std::endl;
1047  std::cerr << os.str ();
1048  }
1049  throw;
1050  }
1051 
1052  if (numPacketsPerLID.size () != 0) {
1053  try {
1054  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1055  // so we have to copy them back to host.
1056  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1057  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1058  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1059  }
1060  catch (std::exception& e) {
1061  if (verbose) {
1062  std::ostringstream os;
1063  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1064  std::cerr << os.str ();
1065  }
1066  throw;
1067  }
1068  catch (...) {
1069  if (verbose) {
1070  std::ostringstream os;
1071  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1072  "of std::exception" << std::endl;
1073  std::cerr << os.str ();
1074  }
1075  throw;
1076  }
1077  }
1078 
1079  if (verbose) {
1080  std::ostringstream os;
1081  os << *prefix << "done" << std::endl;
1082  std::cerr << os.str ();
1083  }
1084 }
1085 
1086 } // namespace Details
1087 } // namespace Tpetra
1088 
1089 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1090  template void \
1091  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1092  Teuchos::Array<char>&, \
1093  const Teuchos::ArrayView<size_t>&, \
1094  const Teuchos::ArrayView<const LO>&, \
1095  size_t&, \
1096  Distributor&); \
1097  template void \
1098  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1099  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1100  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1101  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102  size_t&, \
1103  Distributor&); \
1104  template void \
1105  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1106  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1107  const Teuchos::ArrayView<size_t>&, \
1108  const Teuchos::ArrayView<const LO>&, \
1109  const Teuchos::ArrayView<const int>&, \
1110  size_t&, \
1111  Distributor&);
1112 
1113 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for &quot;invalid&quot; (flag) values of integer types...
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for &quot;new&quot; DistObject inter...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Compute the number of packets and offsets for the pack procedure.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
typename Node::device_type device_type
The Kokkos device type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Base class for distributed Tpetra objects that support data redistribution.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.