10 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
11 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
13 #include "TpetraCore_config.h"
14 #include "Teuchos_Array.hpp"
15 #include "Teuchos_ArrayView.hpp"
55 namespace PackCrsGraphImpl {
63 template<
class OutputOffsetsViewType,
65 class InputOffsetsViewType,
66 class InputLocalRowIndicesViewType,
67 class InputLocalRowPidsViewType,
69 #ifdef HAVE_TPETRA_DEBUG
73 #endif // HAVE_TPETRA_DEBUG
77 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
78 typedef typename CountsViewType::non_const_value_type count_type;
79 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
80 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
81 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
83 typedef typename OutputOffsetsViewType::device_type device_type;
84 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
85 typename device_type::execution_space>::value,
86 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
87 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
88 "OutputOffsetsViewType must be a Kokkos::View.");
89 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
90 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
91 static_assert (std::is_integral<output_offset_type>::value,
92 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
93 static_assert (Kokkos::is_view<CountsViewType>::value,
94 "CountsViewType must be a Kokkos::View.");
95 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
96 "CountsViewType must be a nonconst Kokkos::View.");
97 static_assert (std::is_integral<count_type>::value,
98 "The type of each entry of CountsViewType must be a built-in integer type.");
99 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
100 "InputOffsetsViewType must be a Kokkos::View.");
101 static_assert (std::is_integral<input_offset_type>::value,
102 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
103 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
104 "InputLocalRowIndicesViewType must be a Kokkos::View.");
105 static_assert (std::is_integral<local_row_index_type>::value,
106 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
109 const CountsViewType& counts,
110 const InputOffsetsViewType& rowOffsets,
111 const InputLocalRowIndicesViewType& lclRowInds,
112 const InputLocalRowPidsViewType& lclRowPids) :
113 outputOffsets_ (outputOffsets),
115 rowOffsets_ (rowOffsets),
116 lclRowInds_ (lclRowInds),
117 lclRowPids_ (lclRowPids),
121 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
123 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
124 std::ostringstream os;
125 os <<
"lclRowInds.extent(0) = " << numRowsToPack
126 <<
" != counts.extent(0) = " << counts_.extent (0)
128 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
130 if (static_cast<size_t> (numRowsToPack + 1) !=
131 static_cast<size_t> (outputOffsets_.extent (0))) {
132 std::ostringstream os;
133 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
134 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
136 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
141 KOKKOS_INLINE_FUNCTION
void
142 operator() (
const local_row_index_type& curInd,
143 output_offset_type& update,
144 const bool final)
const
147 if (curInd < static_cast<local_row_index_type> (0)) {
155 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
160 outputOffsets_(curInd) = update;
163 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
164 const auto lclRow = lclRowInds_(curInd);
165 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
166 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
174 const count_type count =
175 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
179 const count_type numEntToPack = (count == 0)
180 ? static_cast<count_type>(0)
181 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
184 counts_(curInd) = numEntToPack;
186 update += numEntToPack;
196 auto error_h = Kokkos::create_mirror_view (error_);
207 OutputOffsetsViewType outputOffsets_;
208 CountsViewType counts_;
209 typename InputOffsetsViewType::const_type rowOffsets_;
210 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
211 typename InputLocalRowPidsViewType::const_type lclRowPids_;
212 Kokkos::View<int, device_type> error_;
224 template<
class OutputOffsetsViewType,
225 class CountsViewType,
226 class InputOffsetsViewType,
227 class InputLocalRowIndicesViewType,
228 class InputLocalRowPidsViewType>
229 typename CountsViewType::non_const_value_type
230 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
231 const CountsViewType& counts,
232 const InputOffsetsViewType& rowOffsets,
233 const InputLocalRowIndicesViewType& lclRowInds,
234 const InputLocalRowPidsViewType& lclRowPids)
236 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
237 CountsViewType,
typename InputOffsetsViewType::const_type,
238 typename InputLocalRowIndicesViewType::const_type,
239 typename InputLocalRowPidsViewType::const_type> functor_type;
240 typedef typename CountsViewType::non_const_value_type count_type;
241 typedef typename OutputOffsetsViewType::size_type size_type;
242 typedef typename OutputOffsetsViewType::execution_space execution_space;
243 typedef typename functor_type::local_row_index_type LO;
244 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
245 const char prefix[] =
"computeNumPacketsAndOffsets: ";
247 count_type count = 0;
248 const count_type numRowsToPack = lclRowInds.extent (0);
250 if (numRowsToPack == 0) {
254 TEUCHOS_TEST_FOR_EXCEPTION
255 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
256 std::invalid_argument, prefix <<
"There is at least one row to pack, "
257 "but the graph has no rows. lclRowInds.extent(0) = " <<
258 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
259 rowOffsets.extent (0) <<
" <= 1.");
260 TEUCHOS_TEST_FOR_EXCEPTION
261 (outputOffsets.extent (0) !=
262 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
263 prefix <<
"Output dimension does not match number of rows to pack. "
264 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
265 <<
" != lclRowInds.extent(0) + 1 = "
266 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
267 TEUCHOS_TEST_FOR_EXCEPTION
268 (counts.extent (0) != numRowsToPack, std::invalid_argument,
269 prefix <<
"counts.extent(0) = " << counts.extent (0)
270 <<
" != numRowsToPack = " << numRowsToPack <<
".");
272 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
273 Kokkos::parallel_scan (
"Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type (0, numRowsToPack + 1), f);
276 const int errCode = f.getError ();
277 TEUCHOS_TEST_FOR_EXCEPTION
278 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
279 << errCode <<
" != 0.");
283 for (LO k = 0; k < numRowsToPack; ++k) {
286 if (outputOffsets(numRowsToPack) != total) {
287 if (errStr.get () == NULL) {
288 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
290 std::ostringstream& os = *errStr;
292 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
293 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
294 << total <<
"." << std::endl;
295 if (numRowsToPack != 0) {
297 if (numRowsToPack < static_cast<LO> (10)) {
298 os <<
"outputOffsets: [";
299 for (LO i = 0; i <= numRowsToPack; ++i) {
300 os << outputOffsets(i);
301 if (static_cast<LO> (i + 1) <= numRowsToPack) {
305 os <<
"]" << std::endl;
307 for (LO i = 0; i < numRowsToPack; ++i) {
309 if (static_cast<LO> (i + 1) < numRowsToPack) {
313 os <<
"]" << std::endl;
316 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
317 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
320 count = outputOffsets(numRowsToPack);
321 return {
false, errStr};
323 #endif // HAVE_TPETRA_DEBUG
327 using Tpetra::Details::getEntryOnHost;
328 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
343 template<
class Packet,
345 class BufferDeviceType,
350 packRow(
const LocalMapType& col_map,
351 const Kokkos::View<Packet*, BufferDeviceType>& exports,
352 const InputLidsType& lids_in,
353 const InputPidsType& pids_in,
355 const size_t num_ent,
356 const bool pack_pids)
358 using LO =
typename LocalMapType::local_ordinal_type;
359 using GO =
typename LocalMapType::global_ordinal_type;
363 return static_cast<size_t>(0);
366 size_t num_ent_packed = num_ent;
368 num_ent_packed += num_ent;
373 for (
size_t k = 0; k < num_ent; ++k) {
374 const LO lid = lids_in[k];
375 const GO gid = col_map.getGlobalElement (lid);
376 exports(offset+k) = gid;
380 for (
size_t k = 0; k < num_ent; ++k) {
381 const LO lid = lids_in[k];
382 const int pid = pids_in[lid];
383 exports(offset+num_ent+k) =
static_cast<GO
>(pid);
387 return num_ent_packed;
390 template<
class Packet,
393 class BufferDeviceType>
394 struct PackCrsGraphFunctor {
395 using local_graph_type = LocalGraph;
396 using local_map_type = LocalMap;
400 using num_packets_per_lid_view_type =
401 Kokkos::View<const size_t*, BufferDeviceType>;
402 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
403 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
404 using export_lids_view_type =
406 using source_pids_view_type =
410 typename num_packets_per_lid_view_type::non_const_value_type;
411 using offset_type =
typename offsets_view_type::non_const_value_type;
412 using value_type = Kokkos::pair<int, LO>;
414 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
415 "local_map_type::local_ordinal_type and "
416 "local_graph_type::data_type must be the same.");
418 local_graph_type local_graph;
419 local_map_type local_col_map;
420 exports_view_type exports;
421 num_packets_per_lid_view_type num_packets_per_lid;
422 export_lids_view_type export_lids;
423 source_pids_view_type source_pids;
424 offsets_view_type offsets;
427 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
428 const local_map_type& local_col_map_in,
429 const exports_view_type& exports_in,
430 const num_packets_per_lid_view_type& num_packets_per_lid_in,
431 const export_lids_view_type& export_lids_in,
432 const source_pids_view_type& source_pids_in,
433 const offsets_view_type& offsets_in,
434 const bool pack_pids_in) :
435 local_graph (local_graph_in),
436 local_col_map (local_col_map_in),
437 exports (exports_in),
438 num_packets_per_lid (num_packets_per_lid_in),
439 export_lids (export_lids_in),
440 source_pids (source_pids_in),
441 offsets (offsets_in),
442 pack_pids (pack_pids_in)
444 const LO numRows = local_graph_in.numRows ();
446 static_cast<LO
> (local_graph.row_map.extent (0));
447 TEUCHOS_TEST_FOR_EXCEPTION
448 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
449 std::logic_error,
"local_graph.row_map.extent(0) = "
450 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
453 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
455 using ::Tpetra::Details::OrdinalTraits;
456 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
459 KOKKOS_INLINE_FUNCTION
void
460 join (value_type& dst,
const value_type& src)
const
464 if (src.first != 0 && dst.first == 0) {
469 KOKKOS_INLINE_FUNCTION
470 void operator() (
const LO i, value_type& dst)
const
472 const size_t offset = offsets[i];
473 const LO export_lid = export_lids[i];
474 const size_t buf_size = exports.size();
475 const size_t num_packets_this_lid = num_packets_per_lid(i);
476 const size_t num_ent =
477 static_cast<size_t> (local_graph.row_map[export_lid+1]
478 - local_graph.row_map[export_lid]);
488 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
489 if (dst.first != 0) {
490 dst = Kokkos::make_pair (1, i);
494 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
495 if (dst.first != 0) {
496 dst = Kokkos::make_pair (2, i);
506 const auto row_beg = local_graph.row_map[export_lid];
507 const auto row_end = local_graph.row_map[export_lid + 1];
508 auto lids_in = Kokkos::subview (local_graph.entries,
509 Kokkos::make_pair (row_beg, row_end));
510 size_t num_ent_packed_this_row =
511 packRow (local_col_map, exports, lids_in,
512 source_pids, offset, num_ent, pack_pids);
513 if (num_ent_packed_this_row != num_packets_this_lid) {
514 if (dst.first != 0) {
515 dst = Kokkos::make_pair (3, i);
528 template<
class Packet,
531 class BufferDeviceType>
533 do_pack(
const LocalGraph& local_graph,
534 const LocalMap& local_map,
535 const Kokkos::View<Packet*, BufferDeviceType>& exports,
536 const typename PackTraits<
538 >::input_array_type& num_packets_per_lid,
539 const typename PackTraits<
541 >::input_array_type& export_lids,
542 const typename PackTraits<
544 >::input_array_type& source_pids,
545 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
546 const bool pack_pids)
549 using execution_space =
typename LocalGraph::device_type::execution_space;
550 using range_type = Kokkos::RangePolicy<execution_space, LO>;
551 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
553 if (export_lids.extent (0) != 0) {
554 TEUCHOS_TEST_FOR_EXCEPTION
555 (static_cast<size_t> (offsets.extent (0)) !=
556 static_cast<size_t> (export_lids.extent (0) + 1),
557 std::invalid_argument, prefix <<
"offsets.extent(0) = "
558 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
559 << export_lids.extent (0) <<
") + 1.");
560 TEUCHOS_TEST_FOR_EXCEPTION
561 (export_lids.extent (0) != num_packets_per_lid.extent (0),
562 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
563 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
564 << num_packets_per_lid.extent (0) <<
".");
568 TEUCHOS_TEST_FOR_EXCEPTION
569 (pack_pids && exports.extent (0) != 0 &&
570 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
571 "pack_pids is true, and exports.extent(0) = " <<
572 exports.extent (0) <<
" != 0, meaning that we need to pack at "
573 "least one graph entry, but source_pids.extent(0) = 0.");
576 using pack_functor_type =
577 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
579 pack_functor_type f (local_graph, local_map, exports,
580 num_packets_per_lid, export_lids,
581 source_pids, offsets, pack_pids);
583 typename pack_functor_type::value_type result;
584 range_type range (0, num_packets_per_lid.extent (0));
585 Kokkos::parallel_reduce (
"Tpetra::Details::computeNumPacketsAndOffsets::reduce",range, f, result);
587 if (result.first != 0) {
590 std::ostringstream os;
591 if (result.first == 1) {
592 os <<
"invalid local row index";
594 else if (result.first == 2) {
595 os <<
"invalid offset";
597 TEUCHOS_TEST_FOR_EXCEPTION
598 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor "
599 "reported error code " << result.first <<
" (" << os.str ()
600 <<
") for the first bad row " << result.second <<
".");
630 template<
typename LO,
typename GO,
typename NT>
633 (
const CrsGraph<LO,GO,NT>& sourceGraph,
635 typename CrsGraph<LO,GO,NT>::packet_type*,
636 typename CrsGraph<LO,GO,NT>::buffer_device_type
640 typename CrsGraph<LO,GO,NT>::buffer_device_type
641 >& num_packets_per_lid,
644 typename CrsGraph<LO, GO, NT>::buffer_device_type
648 typename CrsGraph<LO, GO, NT>::buffer_device_type
650 size_t& constant_num_packets,
651 const bool pack_pids)
654 using crs_graph_type = CrsGraph<LO, GO, NT>;
655 using packet_type =
typename crs_graph_type::packet_type;
656 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
657 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
658 using local_graph_device_type =
typename crs_graph_type::local_graph_device_type;
660 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
661 constexpr
bool debug =
false;
663 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice ();
664 local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
669 constant_num_packets = 0;
671 const size_t num_export_lids (export_lids.extent (0));
672 TEUCHOS_TEST_FOR_EXCEPTION
673 (num_export_lids !=
size_t (num_packets_per_lid.extent (0)),
674 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
675 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
676 << num_packets_per_lid.extent (0) <<
".");
677 if (num_export_lids != 0) {
678 TEUCHOS_TEST_FOR_EXCEPTION
679 (num_packets_per_lid.data () ==
nullptr, std::invalid_argument,
680 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
681 "num_packets_per_lid.data() = "
682 << num_packets_per_lid.data () <<
" == NULL.");
685 if (num_export_lids == 0) {
686 exports = exports_view_type (
"exports", 0);
691 View<size_t*, buffer_device_type> offsets (
"offsets", num_export_lids + 1);
696 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
697 local_graph.row_map, export_lids, export_pids);
700 if (count >
size_t (exports.extent (0))) {
701 exports = exports_view_type (
"exports", count);
703 std::ostringstream os;
704 os <<
"*** exports resized to " << count << std::endl;
705 std::cerr << os.str ();
709 std::ostringstream os;
710 os <<
"*** count: " << count <<
", exports.extent(0): "
711 << exports.extent (0) << std::endl;
712 std::cerr << os.str ();
718 TEUCHOS_TEST_FOR_EXCEPTION
719 (pack_pids && exports.extent (0) != 0 &&
720 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
721 "pack_pids is true, and exports.extent(0) = " <<
722 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
723 "one graph entry, but export_pids.extent(0) = 0.");
725 exports.modify_device ();
726 auto exports_d = exports.view_device ();
727 do_pack<packet_type, local_graph_device_type, local_map_type, buffer_device_type>
728 (local_graph, local_col_map, exports_d, num_packets_per_lid,
729 export_lids, export_pids, offsets, pack_pids);
735 template<
typename LO,
typename GO,
typename NT>
739 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
740 const Teuchos::ArrayView<const LO>& exportLIDs,
741 size_t& constantNumPackets)
743 using Kokkos::HostSpace;
744 using Kokkos::MemoryUnmanaged;
747 using packet_type =
typename crs_graph_type::packet_type;
748 using BDT =
typename crs_graph_type::buffer_device_type;
755 View<size_t*, BDT> num_packets_per_lid_d =
757 numPacketsPerLID.getRawPtr (),
758 numPacketsPerLID.size (),
false,
759 "num_packets_per_lid");
762 View<const LO*, BDT> export_lids_d =
764 exportLIDs.getRawPtr (),
765 exportLIDs.size (),
true,
767 View<const int*, BDT> export_pids_d;
768 Kokkos::DualView<packet_type*, BDT> exports_dv;
769 constexpr
bool pack_pids =
false;
773 typename decltype (num_packets_per_lid_d)::non_const_value_type,
775 "num_packets_per_lid_d's non_const_value_type should be size_t.");
778 typename decltype (num_packets_per_lid_d)::device_type,
780 "num_packets_per_lid_d's BDT should be size_t.");
783 typename decltype (export_lids_d)::device_type,
785 "export_lids_d's device_type should be BDT.");
788 typename decltype (export_pids_d)::non_const_value_type,
790 "export_pids_d's non_const_value_type should be int.");
793 typename decltype (export_pids_d)::device_type,
795 "export_pids_d's device_type should be BDT.");
798 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
799 export_pids_d, constantNumPackets, pack_pids);
803 View<size_t*, HostSpace, MemoryUnmanaged>
804 num_packets_per_lid_h (numPacketsPerLID.getRawPtr (),
805 numPacketsPerLID.size ());
808 using execution_space =
typename BDT::execution_space;
809 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
816 if (static_cast<size_t> (exports.size ()) !=
817 static_cast<size_t> (exports_dv.extent (0))) {
818 exports.resize (exports_dv.extent (0));
820 View<packet_type*, HostSpace, MemoryUnmanaged>
821 exports_h (exports.getRawPtr (), exports.size ());
824 execution_space().fence();
829 template<
typename LO,
typename GO,
typename NT>
832 const Kokkos::DualView<
836 const Kokkos::DualView<
846 > num_packets_per_lid,
847 size_t& constant_num_packets,
848 const bool pack_pids)
852 using BDT =
typename crs_graph_type::buffer_device_type;
853 using PT =
typename crs_graph_type::packet_type;
854 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
855 using LGT =
typename crs_graph_type::local_graph_device_type;
856 using LMT =
typename crs_graph_type::map_type::local_map_type;
857 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
860 const LMT local_col_map = sourceGraph.
getColMap ()->getLocalMap ();
865 constant_num_packets = 0;
867 const size_t num_export_lids =
868 static_cast<size_t> (export_lids.extent (0));
869 TEUCHOS_TEST_FOR_EXCEPTION
871 static_cast<size_t> (num_packets_per_lid.extent (0)),
872 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
873 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
874 << num_packets_per_lid.extent (0) <<
".");
875 TEUCHOS_TEST_FOR_EXCEPTION
876 (num_export_lids != 0 &&
877 num_packets_per_lid.view_device ().data () ==
nullptr,
878 std::invalid_argument, prefix <<
"num_export_lids = "<< num_export_lids
879 <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
881 if (num_export_lids == 0) {
882 exports = exports_dual_view_type ();
887 using offsets_type = Kokkos::View<size_t*, BDT>;
888 offsets_type offsets (
"offsets", num_export_lids + 1);
892 num_packets_per_lid.clear_sync_state ();
893 num_packets_per_lid.modify_device ();
894 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
896 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
898 export_lids.view_device (),
899 export_pids.view_device ());
902 if (count > static_cast<size_t> (exports.extent (0))) {
903 exports = exports_dual_view_type (
"exports", count);
909 TEUCHOS_TEST_FOR_EXCEPTION
910 (pack_pids && exports.extent (0) != 0 &&
911 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
912 "pack_pids is true, and exports.extent(0) = " <<
913 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
914 "one graph entry, but export_pids.extent(0) = 0.");
916 exports.modify_device ();
917 using PackCrsGraphImpl::do_pack;
918 do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
919 exports.view_device (),
920 num_packets_per_lid.view_device (),
921 export_lids.view_device (),
922 export_pids.view_device (),
926 template<
typename LO,
typename GO,
typename NT>
934 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
935 const Teuchos::ArrayView<const LO>& exportLIDs,
936 const Teuchos::ArrayView<const int>& sourcePIDs,
937 size_t& constantNumPackets)
939 using Kokkos::HostSpace;
940 using Kokkos::MemoryUnmanaged;
943 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
949 View<size_t*, buffer_device_type> num_packets_per_lid_d =
951 numPacketsPerLID.getRawPtr (),
952 numPacketsPerLID.size (),
false,
953 "num_packets_per_lid");
957 View<const LO*, buffer_device_type> export_lids_d =
959 exportLIDs.getRawPtr (),
960 exportLIDs.size (),
true,
964 View<const int*, buffer_device_type> export_pids_d =
966 sourcePIDs.getRawPtr (),
967 sourcePIDs.size (),
true,
969 constexpr
bool pack_pids =
true;
971 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
972 export_pids_d, constantNumPackets, pack_pids);
976 View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h
977 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
979 using execution_space =
typename buffer_device_type::execution_space;
981 num_packets_per_lid_h, num_packets_per_lid_d);
982 execution_space().fence();
988 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
990 Details::packCrsGraph<LO, GO, NT> ( \
991 const CrsGraph<LO, GO, NT>&, \
992 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
993 const Teuchos::ArrayView<size_t>&, \
994 const Teuchos::ArrayView<const LO>&, \
997 Details::packCrsGraphNew<LO, GO, NT> ( \
998 const CrsGraph<LO, GO, NT>&, \
999 const Kokkos::DualView< \
1001 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1002 const Kokkos::DualView< \
1004 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1006 CrsGraph<LO,GO,NT>::packet_type*, \
1007 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1010 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1014 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1015 const CrsGraph<LO, GO, NT>&, \
1016 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1017 const Teuchos::ArrayView<size_t>&, \
1018 const Teuchos::ArrayView<const LO>&, \
1019 const Teuchos::ArrayView<const int>&, \
1022 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration of the Tpetra::CrsGraph class.
"Local" part of Map suitable for Kokkos kernels.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declaration and definition of Tpetra::Details::getEntryOnHost.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.