42 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
92 namespace PackCrsGraphImpl {
100 template<
class OutputOffsetsViewType,
101 class CountsViewType,
102 class InputOffsetsViewType,
103 class InputLocalRowIndicesViewType,
104 class InputLocalRowPidsViewType,
106 #ifdef HAVE_TPETRA_DEBUG
110 #endif // HAVE_TPETRA_DEBUG
114 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
115 typedef typename CountsViewType::non_const_value_type count_type;
116 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
117 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
118 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
120 typedef typename OutputOffsetsViewType::device_type device_type;
121 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
122 typename device_type::execution_space>::value,
123 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
124 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
125 "OutputOffsetsViewType must be a Kokkos::View.");
126 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
127 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
128 static_assert (std::is_integral<output_offset_type>::value,
129 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
130 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
131 "CountsViewType must be a Kokkos::View.");
132 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
133 "CountsViewType must be a nonconst Kokkos::View.");
134 static_assert (std::is_integral<count_type>::value,
135 "The type of each entry of CountsViewType must be a built-in integer type.");
136 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
137 "InputOffsetsViewType must be a Kokkos::View.");
138 static_assert (std::is_integral<input_offset_type>::value,
139 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
140 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
141 "InputLocalRowIndicesViewType must be a Kokkos::View.");
142 static_assert (std::is_integral<local_row_index_type>::value,
143 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
146 const CountsViewType& counts,
147 const InputOffsetsViewType& rowOffsets,
148 const InputLocalRowIndicesViewType& lclRowInds,
149 const InputLocalRowPidsViewType& lclRowPids) :
150 outputOffsets_ (outputOffsets),
152 rowOffsets_ (rowOffsets),
153 lclRowInds_ (lclRowInds),
154 lclRowPids_ (lclRowPids),
158 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
160 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
161 std::ostringstream os;
162 os <<
"lclRowInds.extent(0) = " << numRowsToPack
163 <<
" != counts.extent(0) = " << counts_.extent (0)
165 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
167 if (static_cast<size_t> (numRowsToPack + 1) !=
168 static_cast<size_t> (outputOffsets_.extent (0))) {
169 std::ostringstream os;
170 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
171 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
173 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
178 KOKKOS_INLINE_FUNCTION
void
179 operator() (
const local_row_index_type& curInd,
180 output_offset_type& update,
181 const bool final)
const
184 if (curInd < static_cast<local_row_index_type> (0)) {
192 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
197 outputOffsets_(curInd) = update;
200 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
201 const auto lclRow = lclRowInds_(curInd);
202 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
203 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
211 const count_type count =
212 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
216 const count_type numEntToPack = (count == 0)
217 ? static_cast<count_type>(0)
218 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
221 counts_(curInd) = numEntToPack;
223 update += numEntToPack;
233 auto error_h = Kokkos::create_mirror_view (error_);
239 OutputOffsetsViewType outputOffsets_;
240 CountsViewType counts_;
241 typename InputOffsetsViewType::const_type rowOffsets_;
242 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
243 typename InputLocalRowPidsViewType::const_type lclRowPids_;
244 Kokkos::View<int, device_type> error_;
256 template<
class OutputOffsetsViewType,
257 class CountsViewType,
258 class InputOffsetsViewType,
259 class InputLocalRowIndicesViewType,
260 class InputLocalRowPidsViewType>
261 typename CountsViewType::non_const_value_type
262 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
263 const CountsViewType& counts,
264 const InputOffsetsViewType& rowOffsets,
265 const InputLocalRowIndicesViewType& lclRowInds,
266 const InputLocalRowPidsViewType& lclRowPids)
268 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
269 CountsViewType,
typename InputOffsetsViewType::const_type,
270 typename InputLocalRowIndicesViewType::const_type,
271 typename InputLocalRowPidsViewType::const_type> functor_type;
272 typedef typename CountsViewType::non_const_value_type count_type;
273 typedef typename OutputOffsetsViewType::size_type size_type;
274 typedef typename OutputOffsetsViewType::execution_space execution_space;
275 typedef typename functor_type::local_row_index_type LO;
276 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
277 const char prefix[] =
"computeNumPacketsAndOffsets: ";
279 count_type count = 0;
280 const count_type numRowsToPack = lclRowInds.extent (0);
282 if (numRowsToPack == 0) {
286 TEUCHOS_TEST_FOR_EXCEPTION
287 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
288 std::invalid_argument, prefix <<
"There is at least one row to pack, "
289 "but the graph has no rows. lclRowInds.extent(0) = " <<
290 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
291 rowOffsets.extent (0) <<
" <= 1.");
292 TEUCHOS_TEST_FOR_EXCEPTION
293 (outputOffsets.extent (0) !=
294 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
295 prefix <<
"Output dimension does not match number of rows to pack. "
296 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
297 <<
" != lclRowInds.extent(0) + 1 = "
298 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
299 TEUCHOS_TEST_FOR_EXCEPTION
300 (counts.extent (0) != numRowsToPack, std::invalid_argument,
301 prefix <<
"counts.extent(0) = " << counts.extent (0)
302 <<
" != numRowsToPack = " << numRowsToPack <<
".");
304 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
305 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
308 const int errCode = f.getError ();
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
311 << errCode <<
" != 0.");
315 for (LO k = 0; k < numRowsToPack; ++k) {
318 if (outputOffsets(numRowsToPack) != total) {
319 if (errStr.get () == NULL) {
320 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
322 std::ostringstream& os = *errStr;
324 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
325 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
326 << total <<
"." << std::endl;
327 if (numRowsToPack != 0) {
329 if (numRowsToPack < static_cast<LO> (10)) {
330 os <<
"outputOffsets: [";
331 for (LO i = 0; i <= numRowsToPack; ++i) {
332 os << outputOffsets(i);
333 if (static_cast<LO> (i + 1) <= numRowsToPack) {
337 os <<
"]" << std::endl;
339 for (LO i = 0; i < numRowsToPack; ++i) {
341 if (static_cast<LO> (i + 1) < numRowsToPack) {
345 os <<
"]" << std::endl;
348 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
349 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
352 count = outputOffsets(numRowsToPack);
353 return {
false, errStr};
355 #endif // HAVE_TPETRA_DEBUG
359 using Tpetra::Details::getEntryOnHost;
360 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
375 template<
class Packet,
377 class BufferDeviceType,
378 class OtherDeviceType =
typename ColumnMap::device_type>
381 packRow(
const ColumnMap& col_map,
382 const Kokkos::View<Packet*, BufferDeviceType>& exports,
383 const typename PackTraits<
384 typename ColumnMap::local_ordinal_type,
385 OtherDeviceType>::input_array_type& lids_in,
386 const typename PackTraits<
388 OtherDeviceType>::input_array_type& pids_in,
390 const size_t num_ent,
391 const bool pack_pids)
393 using LO =
typename ColumnMap::local_ordinal_type;
394 using GO =
typename ColumnMap::global_ordinal_type;
398 return static_cast<size_t>(0);
401 size_t num_ent_packed = num_ent;
403 num_ent_packed += num_ent;
408 for (
size_t k = 0; k < num_ent; ++k) {
409 const LO lid = lids_in[k];
410 const GO gid = col_map.getGlobalElement (lid);
411 exports(offset+k) = gid;
415 for (
size_t k = 0; k < num_ent; ++k) {
416 const LO lid = lids_in[k];
417 const int pid = pids_in[lid];
418 exports(offset+num_ent+k) =
static_cast<GO
>(pid);
422 return num_ent_packed;
425 template<
class Packet,
428 class BufferDeviceType,
429 class OtherDeviceType =
typename LocalGraph::device_type>
430 struct PackCrsGraphFunctor {
431 using local_graph_type = LocalGraph;
432 using local_map_type = LocalMap;
433 using LO =
typename local_map_type::local_ordinal_type;
434 using GO =
typename local_map_type::global_ordinal_type;
436 using num_packets_per_lid_view_type =
437 Kokkos::View<const size_t*, BufferDeviceType>;
438 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
439 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
440 using export_lids_view_type =
442 using source_pids_view_type =
446 typename num_packets_per_lid_view_type::non_const_value_type;
447 using offset_type =
typename offsets_view_type::non_const_value_type;
448 using value_type = Kokkos::pair<int, LO>;
450 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
451 "local_map_type::local_ordinal_type and "
452 "local_graph_type::data_type must be the same.");
454 local_graph_type local_graph;
455 local_map_type local_col_map;
456 exports_view_type exports;
457 num_packets_per_lid_view_type num_packets_per_lid;
458 export_lids_view_type export_lids;
459 source_pids_view_type source_pids;
460 offsets_view_type offsets;
463 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
464 const local_map_type& local_col_map_in,
465 const exports_view_type& exports_in,
466 const num_packets_per_lid_view_type& num_packets_per_lid_in,
467 const export_lids_view_type& export_lids_in,
468 const source_pids_view_type& source_pids_in,
469 const offsets_view_type& offsets_in,
470 const bool pack_pids_in) :
471 local_graph (local_graph_in),
472 local_col_map (local_col_map_in),
473 exports (exports_in),
474 num_packets_per_lid (num_packets_per_lid_in),
475 export_lids (export_lids_in),
476 source_pids (source_pids_in),
477 offsets (offsets_in),
478 pack_pids (pack_pids_in)
480 const LO numRows = local_graph_in.numRows ();
482 static_cast<LO
> (local_graph.row_map.extent (0));
483 TEUCHOS_TEST_FOR_EXCEPTION
484 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
485 std::logic_error,
"local_graph.row_map.extent(0) = "
486 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
489 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
491 using ::Tpetra::Details::OrdinalTraits;
492 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
495 KOKKOS_INLINE_FUNCTION
void
496 join (
volatile value_type& dst,
const volatile value_type& src)
const
500 if (src.first != 0 && dst.first == 0) {
505 KOKKOS_INLINE_FUNCTION
506 void operator() (
const LO i, value_type& dst)
const
508 const size_t offset = offsets[i];
509 const LO export_lid = export_lids[i];
510 const size_t buf_size = exports.size();
511 const size_t num_packets_this_lid = num_packets_per_lid(i);
512 const size_t num_ent =
513 static_cast<size_t> (local_graph.row_map[export_lid+1]
514 - local_graph.row_map[export_lid]);
524 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
525 if (dst.first != 0) {
526 dst = Kokkos::make_pair (1, i);
530 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
531 if (dst.first != 0) {
532 dst = Kokkos::make_pair (2, i);
542 const auto row_beg = local_graph.row_map[export_lid];
543 const auto row_end = local_graph.row_map[export_lid + 1];
544 auto lids_in = Kokkos::subview (local_graph.entries,
545 Kokkos::make_pair (row_beg, row_end));
546 using LMT = local_map_type;
548 using BDT = BufferDeviceType;
549 using ODT = OtherDeviceType;
550 size_t num_ent_packed_this_row =
551 packRow<PT,LMT,BDT,ODT> (local_col_map, exports, lids_in,
552 source_pids, offset, num_ent, pack_pids);
553 if (num_ent_packed_this_row != num_packets_this_lid) {
554 if (dst.first != 0) {
555 dst = Kokkos::make_pair (3, i);
568 template<
class Packet,
571 class BufferDeviceType,
572 class OtherDeviceType>
574 do_pack(
const LocalGraph& local_graph,
575 const LocalMap& local_map,
576 const Kokkos::View<Packet*, BufferDeviceType>& exports,
577 const typename PackTraits<
580 >::input_array_type& num_packets_per_lid,
581 const typename PackTraits<
582 typename LocalMap::local_ordinal_type,
584 >::input_array_type& export_lids,
585 const typename PackTraits<
588 >::input_array_type& source_pids,
589 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
590 const bool pack_pids)
592 using LO =
typename LocalMap::local_ordinal_type;
593 using execution_space =
typename LocalGraph::device_type::execution_space;
594 using range_type = Kokkos::RangePolicy<execution_space, LO>;
595 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
597 if (export_lids.extent (0) != 0) {
598 TEUCHOS_TEST_FOR_EXCEPTION
599 (static_cast<size_t> (offsets.extent (0)) !=
600 static_cast<size_t> (export_lids.extent (0) + 1),
601 std::invalid_argument, prefix <<
"offsets.extent(0) = "
602 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
603 << export_lids.extent (0) <<
") + 1.");
604 TEUCHOS_TEST_FOR_EXCEPTION
605 (export_lids.extent (0) != num_packets_per_lid.extent (0),
606 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
607 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
608 << num_packets_per_lid.extent (0) <<
".");
612 TEUCHOS_TEST_FOR_EXCEPTION
613 (pack_pids && exports.extent (0) != 0 &&
614 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
615 "pack_pids is true, and exports.extent(0) = " <<
616 exports.extent (0) <<
" != 0, meaning that we need to pack at "
617 "least one graph entry, but source_pids.extent(0) = 0.");
620 using pack_functor_type =
621 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
622 BufferDeviceType, OtherDeviceType>;
623 pack_functor_type f (local_graph, local_map, exports,
624 num_packets_per_lid, export_lids,
625 source_pids, offsets, pack_pids);
627 typename pack_functor_type::value_type result;
628 range_type range (0, num_packets_per_lid.extent (0));
629 Kokkos::parallel_reduce (range, f, result);
631 if (result.first != 0) {
632 std::ostringstream os;
634 if (result.first == 1) {
635 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
637 const auto firstBadLid = export_lids_h(result.second);
638 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
641 else if (result.first == 2) {
642 auto offsets_h = Kokkos::create_mirror_view (offsets);
644 const auto firstBadOffset = offsets_h(result.second);
646 auto num_packets_per_lid_h =
647 Kokkos::create_mirror_view (num_packets_per_lid);
649 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
650 << firstBadOffset <<
", num_packets_per_lid(i) = "
651 << num_packets_per_lid_h(result.second) <<
", buf_size = "
655 TEUCHOS_TEST_FOR_EXCEPTION
656 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor reported "
657 "error code " << result.first <<
" for the first bad row "
658 << result.second <<
". " << os.str ());
688 template<
typename LO,
typename GO,
typename NT>
691 Kokkos::DualView<
typename CrsGraph<LO,GO,NT>::packet_type*,
692 typename CrsGraph<LO,GO,NT>::buffer_device_type>& exports,
693 const Kokkos::View<
size_t*,
694 typename CrsGraph<LO,GO,NT>::buffer_device_type>& num_packets_per_lid,
695 const Kokkos::View<const LO*, typename NT::device_type>& export_lids,
696 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
697 size_t& constant_num_packets,
698 const bool pack_pids,
702 typedef typename CrsGraph<LO,GO,NT>::packet_type packet_type;
703 typedef typename CrsGraph<LO,GO,NT>::buffer_device_type buffer_device_type;
704 typedef typename buffer_device_type::execution_space execution_space;
705 typedef Kokkos::DualView<packet_type*,buffer_device_type> exports_view_type;
706 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
707 constexpr
bool debug =
false;
709 auto local_graph = sourceGraph.getLocalGraph ();
710 auto local_col_map = sourceGraph.getColMap ()->getLocalMap ();
715 constant_num_packets = 0;
717 const size_t num_export_lids =
718 static_cast<size_t> (export_lids.extent (0));
719 TEUCHOS_TEST_FOR_EXCEPTION
721 static_cast<size_t> (num_packets_per_lid.extent (0)),
722 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
723 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
724 << num_packets_per_lid.extent (0) <<
".");
725 if (num_export_lids != 0) {
726 TEUCHOS_TEST_FOR_EXCEPTION
727 (num_packets_per_lid.data () == NULL, std::invalid_argument,
728 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
729 "num_packets_per_lid.data() = "
730 << num_packets_per_lid.data () <<
" == NULL.");
733 if (num_export_lids == 0) {
737 execution_space::fence ();
738 exports = exports_view_type (
"exports", 0);
739 execution_space::fence ();
744 Kokkos::View<size_t*,buffer_device_type> offsets (
"offsets", num_export_lids + 1);
749 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
750 local_graph.row_map, export_lids, export_pids);
753 if (count > static_cast<size_t> (exports.extent (0))) {
757 execution_space::fence ();
758 exports = exports_view_type (
"exports", count);
760 std::ostringstream os;
761 os <<
"*** exports resized to " << count << std::endl;
762 std::cerr << os.str ();
764 execution_space::fence ();
767 std::ostringstream os;
768 os <<
"*** count: " << count <<
", exports.extent(0): "
769 << exports.extent (0) << std::endl;
770 std::cerr << os.str ();
776 TEUCHOS_TEST_FOR_EXCEPTION
777 (pack_pids && exports.extent (0) != 0 &&
778 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
779 "pack_pids is true, and exports.extent(0) = " <<
780 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
781 "one graph entry, but export_pids.extent(0) = 0.");
783 typedef typename std::decay<decltype (local_graph)>::type
785 typedef typename std::decay<decltype (local_col_map)>::type
787 exports.modify_device ();
788 auto exports_d = exports.view_device ();
789 using other_device_type =
typename NT::device_type;
790 do_pack<packet_type,local_graph_type,local_map_type,buffer_device_type,other_device_type>
791 (local_graph, local_col_map, exports_d, num_packets_per_lid,
792 export_lids, export_pids, offsets, pack_pids);
798 template<
typename LO,
typename GO,
typename NT>
802 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
803 const Teuchos::ArrayView<const LO>& exportLIDs,
804 size_t& constantNumPackets,
809 typedef typename local_graph_type::device_type device_type;
810 typedef typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space host_exec_space;
811 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
816 typedef typename device_type::execution_space buffer_exec_space;
817 #ifdef KOKKOS_ENABLE_CUDA
818 typedef typename std::conditional<
820 buffer_exec_space, Kokkos::Cuda
823 typename device_type::memory_space
824 >::type buffer_memory_space;
826 typedef typename device_type::memory_space buffer_memory_space;
827 #endif // KOKKOS_ENABLE_CUDA
829 typedef Kokkos::Device<buffer_exec_space,
830 buffer_memory_space> buffer_device_type;
836 typename local_graph_type::device_type outputDevice;
837 auto num_packets_per_lid_d =
839 numPacketsPerLID.getRawPtr (),
840 numPacketsPerLID.size (),
false,
841 "num_packets_per_lid");
846 exportLIDs.getRawPtr (),
847 exportLIDs.size (),
true,
850 Kokkos::View<int*, device_type> export_pids_d (
"export_pids", 0);
852 Kokkos::DualView<packet_type*,buffer_device_type> exports_dv (
"exports", 0);
853 constexpr
bool pack_pids =
false;
854 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
855 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
856 export_pids_d, constantNumPackets, pack_pids, distor);
859 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
860 (numPacketsPerLID.getRawPtr (),
861 numPacketsPerLID.size ());
869 if (static_cast<size_t> (exports.size ()) !=
870 static_cast<size_t> (exports_dv.extent (0))) {
871 exports.resize (exports_dv.extent (0));
873 Kokkos::View<packet_type*, host_dev_type> exports_h (exports.getRawPtr (),
880 template<
typename LO,
typename GO,
typename NT>
883 const Kokkos::DualView<
887 const Kokkos::DualView<
897 > num_packets_per_lid,
898 size_t& constant_num_packets,
899 const bool pack_pids,
904 using BDT =
typename crs_graph_type::buffer_device_type;
905 using PT =
typename crs_graph_type::packet_type;
906 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
907 using LGT =
typename crs_graph_type::local_graph_type;
908 using LMT =
typename crs_graph_type::map_type::local_map_type;
909 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
912 const LMT local_col_map = sourceGraph.
getColMap ()->getLocalMap ();
917 constant_num_packets = 0;
919 const size_t num_export_lids =
920 static_cast<size_t> (export_lids.extent (0));
921 TEUCHOS_TEST_FOR_EXCEPTION
923 static_cast<size_t> (num_packets_per_lid.extent (0)),
924 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
925 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
926 << num_packets_per_lid.extent (0) <<
".");
927 TEUCHOS_TEST_FOR_EXCEPTION
928 (num_export_lids != 0 &&
929 num_packets_per_lid.view_device ().data () ==
nullptr,
930 std::invalid_argument, prefix <<
"num_export_lids = "<< num_export_lids
931 <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
933 if (num_export_lids == 0) {
934 exports = exports_dual_view_type ();
939 using offsets_type = Kokkos::View<size_t*, BDT>;
940 offsets_type offsets (
"offsets", num_export_lids + 1);
944 num_packets_per_lid.clear_sync_state ();
945 num_packets_per_lid.modify_device ();
946 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
948 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
950 export_lids.view_device (),
951 export_pids.view_device ());
954 if (count > static_cast<size_t> (exports.extent (0))) {
955 exports = exports_dual_view_type (
"exports", count);
961 TEUCHOS_TEST_FOR_EXCEPTION
962 (pack_pids && exports.extent (0) != 0 &&
963 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
964 "pack_pids is true, and exports.extent(0) = " <<
965 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
966 "one graph entry, but export_pids.extent(0) = 0.");
968 exports.modify_device ();
969 using PackCrsGraphImpl::do_pack;
970 do_pack<PT, LGT, LMT, BDT, BDT> (local_graph, local_col_map,
971 exports.view_device (),
972 num_packets_per_lid.view_device (),
973 export_lids.view_device (),
974 export_pids.view_device (),
978 template<
typename LO,
typename GO,
typename NT>
984 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
985 const Teuchos::ArrayView<const LO>& exportLIDs,
986 const Teuchos::ArrayView<const int>& sourcePIDs,
987 size_t& constantNumPackets,
993 typedef typename Kokkos::DualView<packet_type*, buffer_device_type>::t_host::execution_space host_exec_space;
994 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
996 typename local_graph_type::device_type outputDevice;
1002 auto num_packets_per_lid_d =
1004 numPacketsPerLID.getRawPtr (),
1005 numPacketsPerLID.size (),
false,
1006 "num_packets_per_lid");
1010 auto export_lids_d =
1012 exportLIDs.getRawPtr (),
1013 exportLIDs.size (),
true,
1017 auto export_pids_d =
1019 sourcePIDs.getRawPtr (),
1020 sourcePIDs.size (),
true,
1022 constexpr
bool pack_pids =
true;
1023 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
1024 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
1025 export_pids_d, constantNumPackets, pack_pids, distor);
1029 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1030 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1037 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
1039 Details::packCrsGraph<LO, GO, NT> ( \
1040 const CrsGraph<LO, GO, NT>&, \
1041 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
1042 const Teuchos::ArrayView<size_t>&, \
1043 const Teuchos::ArrayView<const LO>&, \
1047 Details::packCrsGraphNew<LO, GO, NT> ( \
1048 const CrsGraph<LO, GO, NT>&, \
1049 const Kokkos::DualView< \
1051 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1052 const Kokkos::DualView< \
1054 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1056 CrsGraph<LO,GO,NT>::packet_type*, \
1057 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1060 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1065 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1066 const CrsGraph<LO, GO, NT>&, \
1067 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1068 const Teuchos::ArrayView<size_t>&, \
1069 const Teuchos::ArrayView<const LO>&, \
1070 const Teuchos::ArrayView<const int>&, \
1074 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of the Tpetra::CrsGraph class.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
int getError() const
Host function for getting the error.
Sets up and executes a communication plan for a Tpetra DistObject.
Kokkos::View< const value_type *, OtherDeviceType, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Declaration and definition of Tpetra::Details::getEntryOnHost.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.