Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_packCrsMatrix_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
55 #include <memory>
56 #include <sstream>
57 #include <stdexcept>
58 #include <string>
59 
82 
83 namespace Tpetra {
84 
85 #ifndef DOXYGEN_SHOULD_SKIP_THIS
86 // Forward declaration of Distributor
87 class Distributor;
88 #endif // DOXYGEN_SHOULD_SKIP_THIS
89 
90 //
91 // Users must never rely on anything in the Details namespace.
92 //
93 namespace Details {
94 
95 namespace PackCrsMatrixImpl {
103 template<class OutputOffsetsViewType,
104  class CountsViewType,
105  class InputOffsetsViewType,
106  class InputLocalRowIndicesViewType,
107  class InputLocalRowPidsViewType,
108  const bool debug =
109 #ifdef HAVE_TPETRA_DEBUG
110  true
111 #else
112  false
113 #endif // HAVE_TPETRA_DEBUG
114  >
116 public:
117  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
118  typedef typename CountsViewType::non_const_value_type count_type;
119  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
120  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
121  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
122  // output Views drive where execution happens.
123  typedef typename OutputOffsetsViewType::device_type device_type;
124  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
125  typename device_type::execution_space>::value,
126  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
127  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
128  "OutputOffsetsViewType must be a Kokkos::View.");
129  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
130  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
131  static_assert (std::is_integral<output_offset_type>::value,
132  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
133  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
134  "CountsViewType must be a Kokkos::View.");
135  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
136  "CountsViewType must be a nonconst Kokkos::View.");
137  static_assert (std::is_integral<count_type>::value,
138  "The type of each entry of CountsViewType must be a built-in integer type.");
139  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
140  "InputOffsetsViewType must be a Kokkos::View.");
141  static_assert (std::is_integral<input_offset_type>::value,
142  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
143  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
144  "InputLocalRowIndicesViewType must be a Kokkos::View.");
145  static_assert (std::is_integral<local_row_index_type>::value,
146  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
147 
148  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
149  const CountsViewType& counts,
150  const InputOffsetsViewType& rowOffsets,
151  const InputLocalRowIndicesViewType& lclRowInds,
152  const InputLocalRowPidsViewType& lclRowPids,
153  const count_type sizeOfLclCount,
154  const count_type sizeOfGblColInd,
155  const count_type sizeOfPid,
156  const count_type sizeOfValue) :
157  outputOffsets_ (outputOffsets),
158  counts_ (counts),
159  rowOffsets_ (rowOffsets),
160  lclRowInds_ (lclRowInds),
161  lclRowPids_ (lclRowPids),
162  sizeOfLclCount_ (sizeOfLclCount),
163  sizeOfGblColInd_ (sizeOfGblColInd),
164  sizeOfPid_ (sizeOfPid),
165  sizeOfValue_ (sizeOfValue),
166  error_ ("error") // don't forget this, or you'll get segfaults!
167  {
168  if (debug) {
169  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
170 
171  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
172  std::ostringstream os;
173  os << "lclRowInds.extent(0) = " << numRowsToPack
174  << " != counts.extent(0) = " << counts_.extent (0)
175  << ".";
176  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
177  }
178  if (static_cast<size_t> (numRowsToPack + 1) !=
179  static_cast<size_t> (outputOffsets_.extent (0))) {
180  std::ostringstream os;
181  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
182  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
183  << ".";
184  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
185  }
186  }
187  }
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator() (const local_row_index_type& curInd,
191  output_offset_type& update,
192  const bool final) const
193  {
194  if (debug) {
195  if (curInd < static_cast<local_row_index_type> (0)) {
196  error_ () = 1;
197  return;
198  }
199  }
200 
201  if (final) {
202  if (debug) {
203  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
204  error_ () = 2;
205  return;
206  }
207  }
208  outputOffsets_(curInd) = update;
209  }
210 
211  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
212  const auto lclRow = lclRowInds_(curInd);
213  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
214  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
215  error_ () = 3;
216  return;
217  }
218  // count_type could differ from the type of each row offset.
219  // For example, row offsets might each be 64 bits, but if their
220  // difference always fits in 32 bits, we may then safely use a
221  // 32-bit count_type.
222  const count_type count =
223  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
224 
225  // We pack first the number of entries in the row, then that
226  // many global column indices, then that many pids (if any),
227  // then that many values. However, if the number of entries in
228  // the row is zero, we pack nothing.
229  const count_type numBytes = (count == 0) ?
230  static_cast<count_type> (0) :
231  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
232  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
233  sizeOfValue_);
234 
235  if (final) {
236  counts_(curInd) = numBytes;
237  }
238  update += numBytes;
239  }
240  }
241 
242  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
243  // have join both with and without volatile! Otherwise intrawarp
244  // joins are really slow on GPUs.
245 
247  int getError () const {
248  auto error_h = Kokkos::create_mirror_view (error_);
249  Kokkos::deep_copy (error_h, error_);
250  return error_h ();
251  }
252 
253 private:
254  OutputOffsetsViewType outputOffsets_;
255  CountsViewType counts_;
256  typename InputOffsetsViewType::const_type rowOffsets_;
257  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
258  typename InputLocalRowPidsViewType::const_type lclRowPids_;
259  count_type sizeOfLclCount_;
260  count_type sizeOfGblColInd_;
261  count_type sizeOfPid_;
262  count_type sizeOfValue_;
263  Kokkos::View<int, device_type> error_;
264 };
265 
275 template<class OutputOffsetsViewType,
276  class CountsViewType,
277  class InputOffsetsViewType,
278  class InputLocalRowIndicesViewType,
279  class InputLocalRowPidsViewType>
280 typename CountsViewType::non_const_value_type
281 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
282  const CountsViewType& counts,
283  const InputOffsetsViewType& rowOffsets,
284  const InputLocalRowIndicesViewType& lclRowInds,
285  const InputLocalRowPidsViewType& lclRowPids,
286  const typename CountsViewType::non_const_value_type sizeOfLclCount,
287  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
288  const typename CountsViewType::non_const_value_type sizeOfPid,
289  const typename CountsViewType::non_const_value_type sizeOfValue)
290 {
291  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
292  CountsViewType, typename InputOffsetsViewType::const_type,
293  typename InputLocalRowIndicesViewType::const_type,
294  typename InputLocalRowPidsViewType::const_type> functor_type;
295  typedef typename CountsViewType::non_const_value_type count_type;
296  typedef typename OutputOffsetsViewType::size_type size_type;
297  typedef typename OutputOffsetsViewType::execution_space execution_space;
298  typedef typename functor_type::local_row_index_type LO;
299  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
300  const char prefix[] = "computeNumPacketsAndOffsets: ";
301 
302  count_type count = 0;
303  const count_type numRowsToPack = lclRowInds.extent (0);
304 
305  if (numRowsToPack == 0) {
306  return count;
307  }
308  else {
309  TEUCHOS_TEST_FOR_EXCEPTION
310  (rowOffsets.extent (0) <= static_cast<size_type> (1),
311  std::invalid_argument, prefix << "There is at least one row to pack, "
312  "but the matrix has no rows. lclRowInds.extent(0) = " <<
313  numRowsToPack << ", but rowOffsets.extent(0) = " <<
314  rowOffsets.extent (0) << " <= 1.");
315  TEUCHOS_TEST_FOR_EXCEPTION
316  (outputOffsets.extent (0) !=
317  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
318  prefix << "Output dimension does not match number of rows to pack. "
319  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
320  << " != lclRowInds.extent(0) + 1 = "
321  << static_cast<size_type> (numRowsToPack + 1) << ".");
322  TEUCHOS_TEST_FOR_EXCEPTION
323  (counts.extent (0) != numRowsToPack, std::invalid_argument,
324  prefix << "counts.extent(0) = " << counts.extent (0)
325  << " != numRowsToPack = " << numRowsToPack << ".");
326 
327  functor_type f (outputOffsets, counts, rowOffsets,
328  lclRowInds, lclRowPids, sizeOfLclCount,
329  sizeOfGblColInd, sizeOfPid, sizeOfValue);
330  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
331 
332  // At least in debug mode, this functor checks for errors.
333  const int errCode = f.getError ();
334  TEUCHOS_TEST_FOR_EXCEPTION
335  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
336  << errCode << " != 0.");
337 
338 #if 0
339  size_t total = 0;
340  for (LO k = 0; k < numRowsToPack; ++k) {
341  total += counts[k];
342  }
343  if (outputOffsets(numRowsToPack) != total) {
344  if (errStr.get () == NULL) {
345  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
346  }
347  std::ostringstream& os = *errStr;
348  os << prefix
349  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
350  << outputOffsets(numRowsToPack) << " != sum of counts = "
351  << total << "." << std::endl;
352  if (numRowsToPack != 0) {
353  // Only print the array if it's not too long.
354  if (numRowsToPack < static_cast<LO> (10)) {
355  os << "outputOffsets: [";
356  for (LO i = 0; i <= numRowsToPack; ++i) {
357  os << outputOffsets(i);
358  if (static_cast<LO> (i + 1) <= numRowsToPack) {
359  os << ",";
360  }
361  }
362  os << "]" << std::endl;
363  os << "counts: [";
364  for (LO i = 0; i < numRowsToPack; ++i) {
365  os << counts(i);
366  if (static_cast<LO> (i + 1) < numRowsToPack) {
367  os << ",";
368  }
369  }
370  os << "]" << std::endl;
371  }
372  else {
373  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
374  << outputOffsets(numRowsToPack-1) << "." << std::endl;
375  }
376  }
377  count = outputOffsets(numRowsToPack);
378  return {false, errStr};
379  }
380 #endif // HAVE_TPETRA_DEBUG
381 
382  // Get last entry of outputOffsets, which is the sum of the entries
383  // of counts. Don't assume UVM.
384  using Tpetra::Details::getEntryOnHost;
385  return static_cast<count_type> (getEntryOnHost (outputOffsets,
386  numRowsToPack));
387  }
388 }
389 
405 template<class ST, class ColumnMap, class BufferDeviceType>
406 KOKKOS_FUNCTION
407 Kokkos::pair<int, size_t>
408 packCrsMatrixRow (const ColumnMap& col_map,
409  const Kokkos::View<char*, BufferDeviceType>& exports,
410  const typename PackTraits<typename ColumnMap::local_ordinal_type, typename ColumnMap::device_type>::input_array_type& lids_in,
411  const typename PackTraits<int, typename ColumnMap::device_type>::input_array_type& pids_in,
412  const typename PackTraits<ST, typename ColumnMap::device_type>::input_array_type& vals_in,
413  const size_t offset,
414  const size_t num_ent,
415  const size_t num_bytes_per_value,
416  const bool pack_pids)
417 {
418  using Kokkos::subview;
419  typedef typename ColumnMap::local_ordinal_type LO;
420  typedef typename ColumnMap::global_ordinal_type GO;
421  typedef BufferDeviceType BDT;
422  typedef Kokkos::pair<int, size_t> return_type;
423 
424  if (num_ent == 0) {
425  // Empty rows always take zero bytes, to ensure sparsity.
426  return return_type (0, 0);
427  }
428 
429  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
430  const size_t num_ent_beg = offset;
431  const size_t num_ent_len = PackTraits<LO, BDT>::packValueCount (num_ent_LO);
432 
433  const size_t gids_beg = num_ent_beg + num_ent_len;
434  const size_t gids_len = num_ent * PackTraits<GO, BDT>::packValueCount (GO (0));
435 
436  const size_t pids_beg = gids_beg + gids_len;
437  const size_t pids_len = pack_pids ?
438  num_ent * PackTraits<int, BDT>::packValueCount (int (0)) :
439  static_cast<size_t> (0);
440 
441  const size_t vals_beg = gids_beg + gids_len + pids_len;
442  const size_t vals_len = num_ent * num_bytes_per_value;
443 
444  char* const num_ent_out = exports.data () + num_ent_beg;
445  char* const gids_out = exports.data () + gids_beg;
446  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
447  char* const vals_out = exports.data () + vals_beg;
448 
449  size_t num_bytes_out = 0;
450  int error_code = 0;
451  num_bytes_out += PackTraits<LO, BDT>::packValue (num_ent_out, num_ent_LO);
452 
453  {
454  // Copy column indices one at a time, so that we don't need
455  // temporary storage.
456  for (size_t k = 0; k < num_ent; ++k) {
457  const LO lid = lids_in[k];
458  const GO gid = col_map.getGlobalElement (lid);
459  num_bytes_out += PackTraits<GO, BDT>::packValue (gids_out, k, gid);
460  }
461  // Copy PIDs one at a time, so that we don't need temporary storage.
462  if (pack_pids) {
463  for (size_t k = 0; k < num_ent; ++k) {
464  const LO lid = lids_in[k];
465  const int pid = pids_in[lid];
466  num_bytes_out += PackTraits<int, BDT>::packValue (pids_out, k, pid);
467  }
468  }
469  const auto p =
470  PackTraits<ST, BDT>::packArray (vals_out, vals_in.data (), num_ent);
471  error_code += p.first;
472  num_bytes_out += p.second;
473  }
474 
475  if (error_code != 0) {
476  return return_type (10, num_bytes_out);
477  }
478 
479  const size_t expected_num_bytes =
480  num_ent_len + gids_len + pids_len + vals_len;
481  if (num_bytes_out != expected_num_bytes) {
482  return return_type (11, num_bytes_out);
483  }
484  return return_type (0, num_bytes_out);
485 }
486 
487 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
488 struct PackCrsMatrixFunctor {
489  typedef LocalMatrix local_matrix_type;
490  typedef LocalMap local_map_type;
491  typedef typename local_matrix_type::value_type ST;
492  typedef typename local_map_type::local_ordinal_type LO;
493  typedef typename local_map_type::global_ordinal_type GO;
494  typedef typename local_matrix_type::device_type DT;
495 
496  typedef Kokkos::View<const size_t*, BufferDeviceType>
497  num_packets_per_lid_view_type;
498  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
499  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
501  export_lids_view_type;
503  source_pids_view_type;
504 
505  typedef typename num_packets_per_lid_view_type::non_const_value_type
506  count_type;
507  typedef typename offsets_view_type::non_const_value_type
508  offset_type;
509  typedef Kokkos::pair<int, LO> value_type;
510 
511  static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
512  "local_map_type::local_ordinal_type and "
513  "local_matrix_type::ordinal_type must be the same.");
514 
515  local_matrix_type local_matrix;
516  local_map_type local_col_map;
517  exports_view_type exports;
518  num_packets_per_lid_view_type num_packets_per_lid;
519  export_lids_view_type export_lids;
520  source_pids_view_type source_pids;
521  offsets_view_type offsets;
522  size_t num_bytes_per_value;
523  bool pack_pids;
524 
525  PackCrsMatrixFunctor (const local_matrix_type& local_matrix_in,
526  const local_map_type& local_col_map_in,
527  const exports_view_type& exports_in,
528  const num_packets_per_lid_view_type& num_packets_per_lid_in,
529  const export_lids_view_type& export_lids_in,
530  const source_pids_view_type& source_pids_in,
531  const offsets_view_type& offsets_in,
532  const size_t num_bytes_per_value_in,
533  const bool pack_pids_in) :
534  local_matrix (local_matrix_in),
535  local_col_map (local_col_map_in),
536  exports (exports_in),
537  num_packets_per_lid (num_packets_per_lid_in),
538  export_lids (export_lids_in),
539  source_pids (source_pids_in),
540  offsets (offsets_in),
541  num_bytes_per_value (num_bytes_per_value_in),
542  pack_pids (pack_pids_in)
543  {
544  const LO numRows = local_matrix_in.numRows ();
545  const LO rowMapDim =
546  static_cast<LO> (local_matrix.graph.row_map.extent (0));
547  TEUCHOS_TEST_FOR_EXCEPTION
548  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
549  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
550  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
551  }
552 
553  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
554  {
555  using ::Tpetra::Details::OrdinalTraits;
556  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
557  }
558 
559  KOKKOS_INLINE_FUNCTION void
560  join (volatile value_type& dst, const volatile value_type& src) const
561  {
562  // `dst` should reflect the first (least) bad index and all other
563  // associated error codes and data, so prefer keeping it.
564  if (src.first != 0 && dst.first == 0) {
565  dst = src;
566  }
567  }
568 
569  KOKKOS_INLINE_FUNCTION
570  void operator() (const LO i, value_type& dst) const
571  {
572  const size_t offset = offsets[i];
573  const LO export_lid = export_lids[i];
574  const size_t buf_size = exports.size();
575  const size_t num_bytes = num_packets_per_lid(i);
576  const size_t num_ent =
577  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
578  - local_matrix.graph.row_map[export_lid]);
579 
580  // Only pack this row's data if it has a nonzero number of
581  // entries. We can do this because receiving processes get the
582  // number of packets, and will know that zero packets means zero
583  // entries.
584  if (num_ent == 0) {
585  return;
586  }
587 
588  if (export_lid >= local_matrix.numRows ()) {
589  if (dst.first != 0) { // keep only the first error
590  dst = Kokkos::make_pair (1, i); // invalid row
591  }
592  return;
593  }
594  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
595  if (dst.first != 0) { // keep only the first error
596  dst = Kokkos::make_pair (2, i); // out of bounds
597  }
598  return;
599  }
600 
601  // We can now pack this row
602 
603  // Since the matrix is locally indexed on the calling process, we
604  // have to use its column Map (which it _must_ have in this case)
605  // to convert to global indices.
606  const auto row_beg = local_matrix.graph.row_map[export_lid];
607  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
608  auto vals_in = subview (local_matrix.values,
609  Kokkos::make_pair (row_beg, row_end));
610  auto lids_in = subview (local_matrix.graph.entries,
611  Kokkos::make_pair (row_beg, row_end));
612  typedef local_map_type LMT;
613  typedef BufferDeviceType BDT;
614  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
615  source_pids, vals_in, offset,
616  num_ent, num_bytes_per_value,
617  pack_pids);
618  int error_code_this_row = p.first;
619  size_t num_bytes_packed_this_row = p.second;
620  if (error_code_this_row != 0) {
621  if (dst.first != 0) { // keep only the first error
622  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
623  }
624  }
625  else if (num_bytes_packed_this_row != num_bytes) {
626  if (dst.first != 0) { // keep only the first error
627  dst = Kokkos::make_pair (3, i);
628  }
629  }
630  }
631 };
632 
640 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
641 void
642 do_pack (const LocalMatrix& local_matrix,
643  const LocalMap& local_map,
644  const Kokkos::View<char*, BufferDeviceType>& exports,
645  const typename PackTraits<
646  size_t,
647  BufferDeviceType
648  >::input_array_type& num_packets_per_lid,
649  const typename PackTraits<
650  typename LocalMap::local_ordinal_type,
651  BufferDeviceType
652  >::input_array_type& export_lids,
653  const typename PackTraits<
654  int,
655  typename LocalMatrix::device_type
656  >::input_array_type& source_pids,
657  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
658  const size_t num_bytes_per_value,
659  const bool pack_pids)
660 {
661  typedef typename LocalMap::local_ordinal_type LO;
662  typedef typename LocalMatrix::device_type DT;
663  typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
664  const char prefix[] = "Tpetra::Details::do_pack: ";
665 
666  if (export_lids.extent (0) != 0) {
667  TEUCHOS_TEST_FOR_EXCEPTION
668  (static_cast<size_t> (offsets.extent (0)) !=
669  static_cast<size_t> (export_lids.extent (0) + 1),
670  std::invalid_argument, prefix << "offsets.extent(0) = "
671  << offsets.extent (0) << " != export_lids.extent(0) (= "
672  << export_lids.extent (0) << ") + 1.");
673  TEUCHOS_TEST_FOR_EXCEPTION
674  (export_lids.extent (0) != num_packets_per_lid.extent (0),
675  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
676  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
677  << num_packets_per_lid.extent (0) << ".");
678  // If exports has nonzero length at this point, then the matrix
679  // has at least one entry to pack. Thus, if packing process
680  // ranks, we had better have at least one process rank to pack.
681  TEUCHOS_TEST_FOR_EXCEPTION
682  (pack_pids && exports.extent (0) != 0 &&
683  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
684  "pack_pids is true, and exports.extent(0) = " <<
685  exports.extent (0) << " != 0, meaning that we need to pack at "
686  "least one matrix entry, but source_pids.extent(0) = 0.");
687  }
688 
689  typedef PackCrsMatrixFunctor<LocalMatrix, LocalMap,
690  BufferDeviceType> pack_functor_type;
691  pack_functor_type f (local_matrix, local_map, exports,
692  num_packets_per_lid, export_lids,
693  source_pids, offsets, num_bytes_per_value,
694  pack_pids);
695 
696  typename pack_functor_type::value_type result;
697  range_type range (0, num_packets_per_lid.extent (0));
698  Kokkos::parallel_reduce (range, f, result);
699 
700  if (result.first != 0) {
701  std::ostringstream os;
702 
703  if (result.first == 1) { // invalid local row index
704  auto export_lids_h = Kokkos::create_mirror_view (export_lids);
705  Kokkos::deep_copy (export_lids_h, export_lids);
706  const auto firstBadLid = export_lids_h(result.second);
707  os << "First bad export LID: export_lids(i=" << result.second << ") = "
708  << firstBadLid;
709  }
710  else if (result.first == 2) { // invalid offset
711  auto offsets_h = Kokkos::create_mirror_view (offsets);
712  Kokkos::deep_copy (offsets_h, offsets);
713  const auto firstBadOffset = offsets_h(result.second);
714 
715  auto num_packets_per_lid_h =
716  Kokkos::create_mirror_view (num_packets_per_lid);
717  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid);
718  os << "First bad offset: offsets(i=" << result.second << ") = "
719  << firstBadOffset << ", num_packets_per_lid(i) = "
720  << num_packets_per_lid_h(result.second) << ", buf_size = "
721  << exports.size ();
722  }
723 
724  TEUCHOS_TEST_FOR_EXCEPTION
725  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor reported "
726  "error code " << result.first << " for the first bad row "
727  << result.second << ". " << os.str ());
728  }
729 }
730 
760 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
761 void
762 packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
763  Kokkos::DualView<char*, BufferDeviceType>& exports,
764  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
765  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
766  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
767  size_t& constant_num_packets,
768  const bool pack_pids,
769  Distributor& /* dist */)
770 {
771  using Kokkos::View;
772  typedef BufferDeviceType DT;
773  typedef typename DT::execution_space execution_space;
774  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
775  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
776  constexpr bool debug = false;
777 
778  auto local_matrix = sourceMatrix.getLocalMatrix ();
779  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
780 
781  // Setting this to zero tells the caller to expect a possibly
782  // different ("nonconstant") number of packets per local index
783  // (i.e., a possibly different number of entries per row).
784  constant_num_packets = 0;
785 
786  const size_t num_export_lids =
787  static_cast<size_t> (export_lids.extent (0));
788  TEUCHOS_TEST_FOR_EXCEPTION
789  (num_export_lids !=
790  static_cast<size_t> (num_packets_per_lid.extent (0)),
791  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
792  << num_export_lids << " != num_packets_per_lid.extent(0) = "
793  << num_packets_per_lid.extent (0) << ".");
794  if (num_export_lids != 0) {
795  TEUCHOS_TEST_FOR_EXCEPTION
796  (num_packets_per_lid.data () == NULL, std::invalid_argument,
797  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
798  "num_packets_per_lid.data() = "
799  << num_packets_per_lid.data () << " == NULL.");
800  }
801 
802  const size_t num_bytes_per_lid = PackTraits<LO, DT>::packValueCount (LO (0));
803  const size_t num_bytes_per_gid = PackTraits<GO, DT>::packValueCount (GO (0));
804  const size_t num_bytes_per_pid = PackTraits<int, DT>::packValueCount (int (0));
805 
806  size_t num_bytes_per_value = 0;
807  if (PackTraits<ST, DT>::compileTimeSize) {
808  // Assume ST is default constructible; packValueCount wants an instance.
809  num_bytes_per_value = PackTraits<ST,DT>::packValueCount (ST ());
810  }
811  else {
812  // Since the packed data come from the source matrix, we can use
813  // the source matrix to get the number of bytes per Scalar value
814  // stored in the matrix. This assumes that all Scalar values in
815  // the source matrix require the same number of bytes. If the
816  // source matrix has no entries on the calling process, then we
817  // hope that some process does have some idea how big a Scalar
818  // value is. Of course, if no processes have any entries, then no
819  // values should be packed (though this does assume that in our
820  // packing scheme, rows with zero entries take zero bytes).
821  size_t num_bytes_per_value_l = 0;
822  if (local_matrix.values.extent(0) > 0) {
823  const ST& val = local_matrix.values(0);
824  num_bytes_per_value_l = PackTraits<ST, DT>::packValueCount (val);
825  }
826  using Teuchos::reduceAll;
827  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
828  Teuchos::REDUCE_MAX,
829  num_bytes_per_value_l,
830  Teuchos::outArg (num_bytes_per_value));
831  }
832 
833  if (num_export_lids == 0) {
834  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
835  // temporarily needed for #227 debugging. Should be able to
836  // remove them after that's fixed.
837  execution_space::fence ();
838  exports = exports_view_type ("exports", 0);
839  execution_space::fence ();
840  return;
841  }
842 
843  // Array of offsets into the pack buffer.
844  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
845 
846  // Compute number of packets per LID (row to send), as well as
847  // corresponding offsets (the prefix sum of the packet counts).
848  const size_t count =
849  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
850  local_matrix.graph.row_map, export_lids,
851  export_pids,
852  num_bytes_per_lid, num_bytes_per_gid,
853  num_bytes_per_pid, num_bytes_per_value);
854 
855  // Resize the output pack buffer if needed.
856  if (count > static_cast<size_t> (exports.extent (0))) {
857  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
858  // temporarily needed for #227 debugging. Should be able to
859  // remove them after that's fixed.
860  execution_space::fence ();
861  exports = exports_view_type ("exports", count);
862  if (debug) {
863  std::ostringstream os;
864  os << "*** exports resized to " << count << std::endl;
865  std::cerr << os.str ();
866  }
867  execution_space::fence ();
868  }
869  if (debug) {
870  std::ostringstream os;
871  os << "*** count: " << count << ", exports.extent(0): "
872  << exports.extent (0) << std::endl;
873  std::cerr << os.str ();
874  }
875 
876  // If exports has nonzero length at this point, then the matrix has
877  // at least one entry to pack. Thus, if packing process ranks, we
878  // had better have at least one process rank to pack.
879  TEUCHOS_TEST_FOR_EXCEPTION
880  (pack_pids && exports.extent (0) != 0 &&
881  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
882  "pack_pids is true, and exports.extent(0) = " <<
883  exports.extent (0) << " != 0, meaning that we need to pack at least "
884  "one matrix entry, but export_pids.extent(0) = 0.");
885 
886  typedef typename std::decay<decltype (local_matrix)>::type
887  local_matrix_type;
888  typedef typename std::decay<decltype (local_col_map)>::type
889  local_map_type;
890 
891  exports.modify_device ();
892  auto exports_d = exports.view_device ();
893  do_pack<local_matrix_type, local_map_type, DT>
894  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
895  export_lids, export_pids, offsets, num_bytes_per_value,
896  pack_pids);
897  // If we got this far, we succeeded.
898 }
899 
900 } // namespace PackCrsMatrixImpl
901 
902 template<typename ST, typename LO, typename GO, typename NT>
903 void
905  Teuchos::Array<char>& exports,
906  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
907  const Teuchos::ArrayView<const LO>& exportLIDs,
908  size_t& constantNumPackets,
909  Distributor& distor)
910 {
911  using local_matrix_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type;
912  using device_type = typename local_matrix_type::device_type;
913  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
914  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
915  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
916 
917  // Convert all Teuchos::Array to Kokkos::View
918 
919  // This is an output array, so we don't have to copy to device here.
920  // However, we'll have to remember to copy back to host when done.
921  typename local_matrix_type::device_type outputDevice;
922  auto num_packets_per_lid_d =
924  numPacketsPerLID.getRawPtr (),
925  numPacketsPerLID.size (), false,
926  "num_packets_per_lid");
927  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
928  // DualView through here, instead of recreating a device View from a
929  // host ArrayView that itself came from a DualView.
930  //
931  // This is an input array, so we have to copy to device here.
932  // However, we never need to copy it back to host.
933  auto export_lids_d =
934  create_mirror_view_from_raw_host_array (buffer_device_type (),
935  exportLIDs.getRawPtr (),
936  exportLIDs.size (), true,
937  "export_lids");
938  static_assert (std::is_same<typename decltype (export_lids_d)::device_type,
939  buffer_device_type>::value,
940  "export_lids_d has the wrong device_type.");
941 
942  // Create an empty array of PIDs
943  Kokkos::View<int*, device_type> export_pids_d ("export_pids", 0);
944 
945  Kokkos::DualView<char*, buffer_device_type> exports_dv ("exports", 0);
946  constexpr bool pack_pids = false;
947  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
948  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
949  export_pids_d, constantNumPackets, pack_pids, distor);
950 
951  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
952  // copy them back to host.
953  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
954  (numPacketsPerLID.getRawPtr (),
955  numPacketsPerLID.size ());
956  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
957 
958  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
959  // exports_dv above, then we have two host copies for exports_h.
960 
961  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
962  // to copy them back to host.
963  if (static_cast<size_t> (exports.size ()) !=
964  static_cast<size_t> (exports_dv.extent (0))) {
965  exports.resize (exports_dv.extent (0));
966  }
967  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
968  exports.size ());
969  Kokkos::deep_copy (exports_h, exports_dv.d_view);
970 }
971 
972 template<typename ST, typename LO, typename GO, typename NT>
973 void
975  Kokkos::DualView<char*,
977  const Kokkos::DualView<size_t*,
978  typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
979  const Kokkos::DualView<const LO*,
981  size_t& constantNumPackets,
982  Distributor& distor)
983 {
984  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
985  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
986 
987  // Create an empty array of PIDs, since the interface needs it.
988  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
989  constexpr bool pack_pids = false;
990 
991  // Write-only device access
992  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
993  numPacketsPerLID_nc.clear_sync_state ();
994  numPacketsPerLID_nc.modify_device ();
995  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
996 
997  // Read-only device access
998  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
999  auto exportLIDs_d = exportLIDs.view_device ();
1000 
1001  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
1002  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
1003  exportPIDs_d, constantNumPackets, pack_pids, distor);
1004 }
1005 
1006 template<typename ST, typename LO, typename GO, typename NT>
1007 void
1009  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
1010  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1011  const Teuchos::ArrayView<const LO>& exportLIDs,
1012  const Teuchos::ArrayView<const int>& sourcePIDs,
1013  size_t& constantNumPackets,
1014  Distributor& distor)
1015 {
1016  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
1017  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
1018  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1019  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1020 
1021  typename local_matrix_type::device_type outputDevice;
1022 
1023  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
1024  std::unique_ptr<std::string> prefix;
1025  if (verbose) {
1026  const int myRank = [&] () {
1027  auto map = sourceMatrix.getMap ();
1028  if (map.get () == nullptr) {
1029  return -1;
1030  }
1031  auto comm = map->getComm ();
1032  if (comm.get () == nullptr) {
1033  return -2;
1034  }
1035  return comm->getRank ();
1036  } ();
1037  std::ostringstream os;
1038  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
1039  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
1040 
1041  std::ostringstream os2;
1042  os2 << *prefix << "start" << std::endl;
1043  std::cerr << os2.str ();
1044  }
1045 
1046  // Convert all Teuchos::Array to Kokkos::View
1047 
1048  // This is an output array, so we don't have to copy to device here.
1049  // However, we'll have to remember to copy back to host when done.
1050  auto num_packets_per_lid_d =
1051  create_mirror_view_from_raw_host_array (buffer_device_type (),
1052  numPacketsPerLID.getRawPtr (),
1053  numPacketsPerLID.size (), false,
1054  "num_packets_per_lid");
1055 
1056  // This is an input array, so we have to copy to device here.
1057  // However, we never need to copy it back to host.
1058  auto export_lids_d =
1059  create_mirror_view_from_raw_host_array (buffer_device_type (),
1060  exportLIDs.getRawPtr (),
1061  exportLIDs.size (), true,
1062  "export_lids");
1063  // This is an input array, so we have to copy to device here.
1064  // However, we never need to copy it back to host.
1065  auto export_pids_d =
1067  sourcePIDs.getRawPtr (),
1068  sourcePIDs.size (), true,
1069  "export_pids");
1070  constexpr bool pack_pids = true;
1071  try {
1072  PackCrsMatrixImpl::packCrsMatrix
1073  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1074  export_pids_d, constantNumPackets, pack_pids, distor);
1075  }
1076  catch (std::exception& e) {
1077  if (verbose) {
1078  std::ostringstream os;
1079  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1080  << e.what () << std::endl;
1081  std::cerr << os.str ();
1082  }
1083  throw;
1084  }
1085  catch (...) {
1086  if (verbose) {
1087  std::ostringstream os;
1088  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1089  "not a subclass of std::exception" << std::endl;
1090  std::cerr << os.str ();
1091  }
1092  throw;
1093  }
1094 
1095  if (numPacketsPerLID.size () != 0) {
1096  try {
1097  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1098  // so we have to copy them back to host.
1099  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1100  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1101  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1102  }
1103  catch (std::exception& e) {
1104  if (verbose) {
1105  std::ostringstream os;
1106  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1107  std::cerr << os.str ();
1108  }
1109  throw;
1110  }
1111  catch (...) {
1112  if (verbose) {
1113  std::ostringstream os;
1114  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1115  "of std::exception" << std::endl;
1116  std::cerr << os.str ();
1117  }
1118  throw;
1119  }
1120  }
1121 
1122  if (verbose) {
1123  std::ostringstream os;
1124  os << *prefix << "done" << std::endl;
1125  std::cerr << os.str ();
1126  }
1127 }
1128 
1129 } // namespace Details
1130 } // namespace Tpetra
1131 
1132 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1133  template void \
1134  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1135  Teuchos::Array<char>&, \
1136  const Teuchos::ArrayView<size_t>&, \
1137  const Teuchos::ArrayView<const LO>&, \
1138  size_t&, \
1139  Distributor&); \
1140  template void \
1141  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1142  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1143  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1144  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1145  size_t&, \
1146  Distributor&); \
1147  template void \
1148  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1149  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1150  const Teuchos::ArrayView<size_t>&, \
1151  const Teuchos::ArrayView<const LO>&, \
1152  const Teuchos::ArrayView<const int>&, \
1153  size_t&, \
1154  Distributor&);
1155 
1156 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for &quot;invalid&quot; (flag) values of integer types...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for &quot;new&quot; DistObject inter...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Compute the number of packets and offsets for the pack procedure.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
Kokkos::View< const value_type *, BufferDeviceType, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Base class for distributed Tpetra objects that support data redistribution.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.