42 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
92 namespace PackCrsGraphImpl {
100 template<
class OutputOffsetsViewType,
101 class CountsViewType,
102 class InputOffsetsViewType,
103 class InputLocalRowIndicesViewType,
104 class InputLocalRowPidsViewType,
106 #ifdef HAVE_TPETRA_DEBUG
110 #endif // HAVE_TPETRA_DEBUG
114 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
115 typedef typename CountsViewType::non_const_value_type count_type;
116 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
117 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
118 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
120 typedef typename OutputOffsetsViewType::device_type device_type;
121 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
122 typename device_type::execution_space>::value,
123 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
124 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
125 "OutputOffsetsViewType must be a Kokkos::View.");
126 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
127 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
128 static_assert (std::is_integral<output_offset_type>::value,
129 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
130 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
131 "CountsViewType must be a Kokkos::View.");
132 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
133 "CountsViewType must be a nonconst Kokkos::View.");
134 static_assert (std::is_integral<count_type>::value,
135 "The type of each entry of CountsViewType must be a built-in integer type.");
136 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
137 "InputOffsetsViewType must be a Kokkos::View.");
138 static_assert (std::is_integral<input_offset_type>::value,
139 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
140 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
141 "InputLocalRowIndicesViewType must be a Kokkos::View.");
142 static_assert (std::is_integral<local_row_index_type>::value,
143 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
146 const CountsViewType& counts,
147 const InputOffsetsViewType& rowOffsets,
148 const InputLocalRowIndicesViewType& lclRowInds,
149 const InputLocalRowPidsViewType& lclRowPids) :
150 outputOffsets_ (outputOffsets),
152 rowOffsets_ (rowOffsets),
153 lclRowInds_ (lclRowInds),
154 lclRowPids_ (lclRowPids),
158 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
160 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
161 std::ostringstream os;
162 os <<
"lclRowInds.extent(0) = " << numRowsToPack
163 <<
" != counts.extent(0) = " << counts_.extent (0)
165 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
167 if (static_cast<size_t> (numRowsToPack + 1) !=
168 static_cast<size_t> (outputOffsets_.extent (0))) {
169 std::ostringstream os;
170 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
171 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
173 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
178 KOKKOS_INLINE_FUNCTION
void
179 operator() (
const local_row_index_type& curInd,
180 output_offset_type& update,
181 const bool final)
const
184 if (curInd < static_cast<local_row_index_type> (0)) {
192 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
197 outputOffsets_(curInd) = update;
200 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
201 const auto lclRow = lclRowInds_(curInd);
202 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
203 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
211 const count_type count =
212 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
216 const count_type numEntToPack = (count == 0)
217 ? static_cast<count_type>(0)
218 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
221 counts_(curInd) = numEntToPack;
223 update += numEntToPack;
233 auto error_h = Kokkos::create_mirror_view (error_);
239 OutputOffsetsViewType outputOffsets_;
240 CountsViewType counts_;
241 typename InputOffsetsViewType::const_type rowOffsets_;
242 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
243 typename InputLocalRowPidsViewType::const_type lclRowPids_;
244 Kokkos::View<int, device_type> error_;
256 template<
class OutputOffsetsViewType,
257 class CountsViewType,
258 class InputOffsetsViewType,
259 class InputLocalRowIndicesViewType,
260 class InputLocalRowPidsViewType>
261 typename CountsViewType::non_const_value_type
262 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
263 const CountsViewType& counts,
264 const InputOffsetsViewType& rowOffsets,
265 const InputLocalRowIndicesViewType& lclRowInds,
266 const InputLocalRowPidsViewType& lclRowPids)
268 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
269 CountsViewType,
typename InputOffsetsViewType::const_type,
270 typename InputLocalRowIndicesViewType::const_type,
271 typename InputLocalRowPidsViewType::const_type> functor_type;
272 typedef typename CountsViewType::non_const_value_type count_type;
273 typedef typename OutputOffsetsViewType::size_type size_type;
274 typedef typename OutputOffsetsViewType::execution_space execution_space;
275 typedef typename functor_type::local_row_index_type LO;
276 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
277 const char prefix[] =
"computeNumPacketsAndOffsets: ";
279 count_type count = 0;
280 const count_type numRowsToPack = lclRowInds.extent (0);
282 if (numRowsToPack == 0) {
286 TEUCHOS_TEST_FOR_EXCEPTION
287 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
288 std::invalid_argument, prefix <<
"There is at least one row to pack, "
289 "but the graph has no rows. lclRowInds.extent(0) = " <<
290 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
291 rowOffsets.extent (0) <<
" <= 1.");
292 TEUCHOS_TEST_FOR_EXCEPTION
293 (outputOffsets.extent (0) !=
294 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
295 prefix <<
"Output dimension does not match number of rows to pack. "
296 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
297 <<
" != lclRowInds.extent(0) + 1 = "
298 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
299 TEUCHOS_TEST_FOR_EXCEPTION
300 (counts.extent (0) != numRowsToPack, std::invalid_argument,
301 prefix <<
"counts.extent(0) = " << counts.extent (0)
302 <<
" != numRowsToPack = " << numRowsToPack <<
".");
304 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
305 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
308 const int errCode = f.getError ();
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
311 << errCode <<
" != 0.");
315 for (LO k = 0; k < numRowsToPack; ++k) {
318 if (outputOffsets(numRowsToPack) != total) {
319 if (errStr.get () == NULL) {
320 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
322 std::ostringstream& os = *errStr;
324 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
325 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
326 << total <<
"." << std::endl;
327 if (numRowsToPack != 0) {
329 if (numRowsToPack < static_cast<LO> (10)) {
330 os <<
"outputOffsets: [";
331 for (LO i = 0; i <= numRowsToPack; ++i) {
332 os << outputOffsets(i);
333 if (static_cast<LO> (i + 1) <= numRowsToPack) {
337 os <<
"]" << std::endl;
339 for (LO i = 0; i < numRowsToPack; ++i) {
341 if (static_cast<LO> (i + 1) < numRowsToPack) {
345 os <<
"]" << std::endl;
348 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
349 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
352 count = outputOffsets(numRowsToPack);
353 return {
false, errStr};
355 #endif // HAVE_TPETRA_DEBUG
359 using Tpetra::Details::getEntryOnHost;
360 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
375 template<
class Packet,
377 class BufferDeviceType,
382 packRow(
const LocalMapType& col_map,
383 const Kokkos::View<Packet*, BufferDeviceType>& exports,
384 const InputLidsType& lids_in,
385 const InputPidsType& pids_in,
387 const size_t num_ent,
388 const bool pack_pids)
390 using LO =
typename LocalMapType::local_ordinal_type;
391 using GO =
typename LocalMapType::global_ordinal_type;
395 return static_cast<size_t>(0);
398 size_t num_ent_packed = num_ent;
400 num_ent_packed += num_ent;
405 for (
size_t k = 0; k < num_ent; ++k) {
406 const LO lid = lids_in[k];
407 const GO gid = col_map.getGlobalElement (lid);
408 exports(offset+k) = gid;
412 for (
size_t k = 0; k < num_ent; ++k) {
413 const LO lid = lids_in[k];
414 const int pid = pids_in[lid];
415 exports(offset+num_ent+k) =
static_cast<GO
>(pid);
419 return num_ent_packed;
422 template<
class Packet,
425 class BufferDeviceType>
426 struct PackCrsGraphFunctor {
427 using local_graph_type = LocalGraph;
428 using local_map_type = LocalMap;
429 using LO =
typename local_map_type::local_ordinal_type;
430 using GO =
typename local_map_type::global_ordinal_type;
432 using num_packets_per_lid_view_type =
433 Kokkos::View<const size_t*, BufferDeviceType>;
434 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
435 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
436 using export_lids_view_type =
438 using source_pids_view_type =
442 typename num_packets_per_lid_view_type::non_const_value_type;
443 using offset_type =
typename offsets_view_type::non_const_value_type;
444 using value_type = Kokkos::pair<int, LO>;
446 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
447 "local_map_type::local_ordinal_type and "
448 "local_graph_type::data_type must be the same.");
450 local_graph_type local_graph;
451 local_map_type local_col_map;
452 exports_view_type exports;
453 num_packets_per_lid_view_type num_packets_per_lid;
454 export_lids_view_type export_lids;
455 source_pids_view_type source_pids;
456 offsets_view_type offsets;
459 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
460 const local_map_type& local_col_map_in,
461 const exports_view_type& exports_in,
462 const num_packets_per_lid_view_type& num_packets_per_lid_in,
463 const export_lids_view_type& export_lids_in,
464 const source_pids_view_type& source_pids_in,
465 const offsets_view_type& offsets_in,
466 const bool pack_pids_in) :
467 local_graph (local_graph_in),
468 local_col_map (local_col_map_in),
469 exports (exports_in),
470 num_packets_per_lid (num_packets_per_lid_in),
471 export_lids (export_lids_in),
472 source_pids (source_pids_in),
473 offsets (offsets_in),
474 pack_pids (pack_pids_in)
476 const LO numRows = local_graph_in.numRows ();
478 static_cast<LO
> (local_graph.row_map.extent (0));
479 TEUCHOS_TEST_FOR_EXCEPTION
480 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
481 std::logic_error,
"local_graph.row_map.extent(0) = "
482 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
485 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
487 using ::Tpetra::Details::OrdinalTraits;
488 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
491 KOKKOS_INLINE_FUNCTION
void
492 join (
volatile value_type& dst,
const volatile value_type& src)
const
496 if (src.first != 0 && dst.first == 0) {
501 KOKKOS_INLINE_FUNCTION
502 void operator() (
const LO i, value_type& dst)
const
504 const size_t offset = offsets[i];
505 const LO export_lid = export_lids[i];
506 const size_t buf_size = exports.size();
507 const size_t num_packets_this_lid = num_packets_per_lid(i);
508 const size_t num_ent =
509 static_cast<size_t> (local_graph.row_map[export_lid+1]
510 - local_graph.row_map[export_lid]);
520 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
521 if (dst.first != 0) {
522 dst = Kokkos::make_pair (1, i);
526 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
527 if (dst.first != 0) {
528 dst = Kokkos::make_pair (2, i);
538 const auto row_beg = local_graph.row_map[export_lid];
539 const auto row_end = local_graph.row_map[export_lid + 1];
540 auto lids_in = Kokkos::subview (local_graph.entries,
541 Kokkos::make_pair (row_beg, row_end));
542 size_t num_ent_packed_this_row =
543 packRow (local_col_map, exports, lids_in,
544 source_pids, offset, num_ent, pack_pids);
545 if (num_ent_packed_this_row != num_packets_this_lid) {
546 if (dst.first != 0) {
547 dst = Kokkos::make_pair (3, i);
560 template<
class Packet,
563 class BufferDeviceType>
565 do_pack(
const LocalGraph& local_graph,
566 const LocalMap& local_map,
567 const Kokkos::View<Packet*, BufferDeviceType>& exports,
568 const typename PackTraits<
571 >::input_array_type& num_packets_per_lid,
572 const typename PackTraits<
573 typename LocalMap::local_ordinal_type,
575 >::input_array_type& export_lids,
576 const typename PackTraits<
579 >::input_array_type& source_pids,
580 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
581 const bool pack_pids)
583 using LO =
typename LocalMap::local_ordinal_type;
584 using execution_space =
typename LocalGraph::device_type::execution_space;
585 using range_type = Kokkos::RangePolicy<execution_space, LO>;
586 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
588 if (export_lids.extent (0) != 0) {
589 TEUCHOS_TEST_FOR_EXCEPTION
590 (static_cast<size_t> (offsets.extent (0)) !=
591 static_cast<size_t> (export_lids.extent (0) + 1),
592 std::invalid_argument, prefix <<
"offsets.extent(0) = "
593 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
594 << export_lids.extent (0) <<
") + 1.");
595 TEUCHOS_TEST_FOR_EXCEPTION
596 (export_lids.extent (0) != num_packets_per_lid.extent (0),
597 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
598 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
599 << num_packets_per_lid.extent (0) <<
".");
603 TEUCHOS_TEST_FOR_EXCEPTION
604 (pack_pids && exports.extent (0) != 0 &&
605 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
606 "pack_pids is true, and exports.extent(0) = " <<
607 exports.extent (0) <<
" != 0, meaning that we need to pack at "
608 "least one graph entry, but source_pids.extent(0) = 0.");
611 using pack_functor_type =
612 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
614 pack_functor_type f (local_graph, local_map, exports,
615 num_packets_per_lid, export_lids,
616 source_pids, offsets, pack_pids);
618 typename pack_functor_type::value_type result;
619 range_type range (0, num_packets_per_lid.extent (0));
620 Kokkos::parallel_reduce (range, f, result);
622 if (result.first != 0) {
623 std::ostringstream os;
625 if (result.first == 1) {
626 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
628 const auto firstBadLid = export_lids_h(result.second);
629 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
632 else if (result.first == 2) {
633 auto offsets_h = Kokkos::create_mirror_view (offsets);
635 const auto firstBadOffset = offsets_h(result.second);
637 auto num_packets_per_lid_h =
638 Kokkos::create_mirror_view (num_packets_per_lid);
640 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
641 << firstBadOffset <<
", num_packets_per_lid(i) = "
642 << num_packets_per_lid_h(result.second) <<
", buf_size = "
646 TEUCHOS_TEST_FOR_EXCEPTION
647 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor reported "
648 "error code " << result.first <<
" for the first bad row "
649 << result.second <<
". " << os.str ());
679 template<
typename LO,
typename GO,
typename NT>
682 (
const CrsGraph<LO,GO,NT>& sourceGraph,
684 typename CrsGraph<LO,GO,NT>::packet_type*,
685 typename CrsGraph<LO,GO,NT>::buffer_device_type
689 typename CrsGraph<LO,GO,NT>::buffer_device_type
690 >& num_packets_per_lid,
693 typename CrsGraph<LO, GO, NT>::buffer_device_type
697 typename CrsGraph<LO, GO, NT>::buffer_device_type
699 size_t& constant_num_packets,
700 const bool pack_pids,
704 using crs_graph_type = CrsGraph<LO, GO, NT>;
705 using packet_type =
typename crs_graph_type::packet_type;
706 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
707 using execution_space =
typename buffer_device_type::execution_space;
708 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
709 using local_graph_type =
typename crs_graph_type::local_graph_type;
711 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
712 constexpr
bool debug =
false;
714 local_graph_type local_graph = sourceGraph.getLocalGraph ();
715 local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
720 constant_num_packets = 0;
722 const size_t num_export_lids (export_lids.extent (0));
723 TEUCHOS_TEST_FOR_EXCEPTION
724 (num_export_lids !=
size_t (num_packets_per_lid.extent (0)),
725 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
726 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
727 << num_packets_per_lid.extent (0) <<
".");
728 if (num_export_lids != 0) {
729 TEUCHOS_TEST_FOR_EXCEPTION
730 (num_packets_per_lid.data () ==
nullptr, std::invalid_argument,
731 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
732 "num_packets_per_lid.data() = "
733 << num_packets_per_lid.data () <<
" == NULL.");
736 if (num_export_lids == 0) {
740 execution_space().fence ();
741 exports = exports_view_type (
"exports", 0);
742 execution_space().fence ();
747 View<size_t*, buffer_device_type> offsets (
"offsets", num_export_lids + 1);
752 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
753 local_graph.row_map, export_lids, export_pids);
756 if (count >
size_t (exports.extent (0))) {
760 execution_space().fence ();
761 exports = exports_view_type (
"exports", count);
763 std::ostringstream os;
764 os <<
"*** exports resized to " << count << std::endl;
765 std::cerr << os.str ();
767 execution_space().fence ();
770 std::ostringstream os;
771 os <<
"*** count: " << count <<
", exports.extent(0): "
772 << exports.extent (0) << std::endl;
773 std::cerr << os.str ();
779 TEUCHOS_TEST_FOR_EXCEPTION
780 (pack_pids && exports.extent (0) != 0 &&
781 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
782 "pack_pids is true, and exports.extent(0) = " <<
783 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
784 "one graph entry, but export_pids.extent(0) = 0.");
786 exports.modify_device ();
787 auto exports_d = exports.view_device ();
788 do_pack<packet_type, local_graph_type, local_map_type, buffer_device_type>
789 (local_graph, local_col_map, exports_d, num_packets_per_lid,
790 export_lids, export_pids, offsets, pack_pids);
796 template<
typename LO,
typename GO,
typename NT>
800 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
801 const Teuchos::ArrayView<const LO>& exportLIDs,
802 size_t& constantNumPackets,
805 using Kokkos::HostSpace;
806 using Kokkos::MemoryUnmanaged;
809 using packet_type =
typename crs_graph_type::packet_type;
810 using BDT =
typename crs_graph_type::buffer_device_type;
817 View<size_t*, BDT> num_packets_per_lid_d =
819 numPacketsPerLID.getRawPtr (),
820 numPacketsPerLID.size (),
false,
821 "num_packets_per_lid");
824 View<const LO*, BDT> export_lids_d =
826 exportLIDs.getRawPtr (),
827 exportLIDs.size (),
true,
829 View<const int*, BDT> export_pids_d;
830 Kokkos::DualView<packet_type*, BDT> exports_dv;
831 constexpr
bool pack_pids =
false;
835 typename decltype (num_packets_per_lid_d)::non_const_value_type,
837 "num_packets_per_lid_d's non_const_value_type should be size_t.");
840 typename decltype (num_packets_per_lid_d)::device_type,
842 "num_packets_per_lid_d's BDT should be size_t.");
845 typename decltype (export_lids_d)::device_type,
847 "export_lids_d's device_type should be BDT.");
850 typename decltype (export_pids_d)::non_const_value_type,
852 "export_pids_d's non_const_value_type should be int.");
855 typename decltype (export_pids_d)::device_type,
857 "export_pids_d's device_type should be BDT.");
859 PackCrsGraphImpl::packCrsGraph
860 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
861 export_pids_d, constantNumPackets, pack_pids, distor);
865 View<size_t*, HostSpace, MemoryUnmanaged>
866 num_packets_per_lid_h (numPacketsPerLID.getRawPtr (),
867 numPacketsPerLID.size ());
875 if (static_cast<size_t> (exports.size ()) !=
876 static_cast<size_t> (exports_dv.extent (0))) {
877 exports.resize (exports_dv.extent (0));
879 View<packet_type*, HostSpace, MemoryUnmanaged>
880 exports_h (exports.getRawPtr (), exports.size ());
886 template<
typename LO,
typename GO,
typename NT>
889 const Kokkos::DualView<
893 const Kokkos::DualView<
903 > num_packets_per_lid,
904 size_t& constant_num_packets,
905 const bool pack_pids,
910 using BDT =
typename crs_graph_type::buffer_device_type;
911 using PT =
typename crs_graph_type::packet_type;
912 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
913 using LGT =
typename crs_graph_type::local_graph_type;
914 using LMT =
typename crs_graph_type::map_type::local_map_type;
915 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
918 const LMT local_col_map = sourceGraph.
getColMap ()->getLocalMap ();
923 constant_num_packets = 0;
925 const size_t num_export_lids =
926 static_cast<size_t> (export_lids.extent (0));
927 TEUCHOS_TEST_FOR_EXCEPTION
929 static_cast<size_t> (num_packets_per_lid.extent (0)),
930 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
931 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
932 << num_packets_per_lid.extent (0) <<
".");
933 TEUCHOS_TEST_FOR_EXCEPTION
934 (num_export_lids != 0 &&
935 num_packets_per_lid.view_device ().data () ==
nullptr,
936 std::invalid_argument, prefix <<
"num_export_lids = "<< num_export_lids
937 <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
939 if (num_export_lids == 0) {
940 exports = exports_dual_view_type ();
945 using offsets_type = Kokkos::View<size_t*, BDT>;
946 offsets_type offsets (
"offsets", num_export_lids + 1);
950 num_packets_per_lid.clear_sync_state ();
951 num_packets_per_lid.modify_device ();
952 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
954 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
956 export_lids.view_device (),
957 export_pids.view_device ());
960 if (count > static_cast<size_t> (exports.extent (0))) {
961 exports = exports_dual_view_type (
"exports", count);
967 TEUCHOS_TEST_FOR_EXCEPTION
968 (pack_pids && exports.extent (0) != 0 &&
969 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
970 "pack_pids is true, and exports.extent(0) = " <<
971 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
972 "one graph entry, but export_pids.extent(0) = 0.");
974 exports.modify_device ();
975 using PackCrsGraphImpl::do_pack;
976 do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
977 exports.view_device (),
978 num_packets_per_lid.view_device (),
979 export_lids.view_device (),
980 export_pids.view_device (),
984 template<
typename LO,
typename GO,
typename NT>
992 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
993 const Teuchos::ArrayView<const LO>& exportLIDs,
994 const Teuchos::ArrayView<const int>& sourcePIDs,
995 size_t& constantNumPackets,
998 using Kokkos::HostSpace;
999 using Kokkos::MemoryUnmanaged;
1002 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
1008 View<size_t*, buffer_device_type> num_packets_per_lid_d =
1010 numPacketsPerLID.getRawPtr (),
1011 numPacketsPerLID.size (),
false,
1012 "num_packets_per_lid");
1016 View<const LO*, buffer_device_type> export_lids_d =
1018 exportLIDs.getRawPtr (),
1019 exportLIDs.size (),
true,
1023 View<const int*, buffer_device_type> export_pids_d =
1025 sourcePIDs.getRawPtr (),
1026 sourcePIDs.size (),
true,
1028 constexpr
bool pack_pids =
true;
1029 PackCrsGraphImpl::packCrsGraph
1030 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
1031 export_pids_d, constantNumPackets, pack_pids, distor);
1035 View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h
1036 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1043 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
1045 Details::packCrsGraph<LO, GO, NT> ( \
1046 const CrsGraph<LO, GO, NT>&, \
1047 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
1048 const Teuchos::ArrayView<size_t>&, \
1049 const Teuchos::ArrayView<const LO>&, \
1053 Details::packCrsGraphNew<LO, GO, NT> ( \
1054 const CrsGraph<LO, GO, NT>&, \
1055 const Kokkos::DualView< \
1057 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1058 const Kokkos::DualView< \
1060 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1062 CrsGraph<LO,GO,NT>::packet_type*, \
1063 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1066 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1071 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1072 const CrsGraph<LO, GO, NT>&, \
1073 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1074 const Teuchos::ArrayView<size_t>&, \
1075 const Teuchos::ArrayView<const LO>&, \
1076 const Teuchos::ArrayView<const int>&, \
1080 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of the Tpetra::CrsGraph class.
"Local" part of Map suitable for Kokkos kernels.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Sets up and executes a communication plan for a Tpetra DistObject.
Kokkos::View< const value_type *, BufferDeviceType, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Declaration and definition of Tpetra::Details::getEntryOnHost.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.