10 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
11 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
13 #include "TpetraCore_config.h"
14 #include "Teuchos_Array.hpp"
15 #include "Teuchos_ArrayView.hpp"
55 namespace PackCrsGraphImpl {
63 template <
class OutputOffsetsViewType,
65 class InputOffsetsViewType,
66 class InputLocalRowIndicesViewType,
67 class InputLocalRowPidsViewType,
69 #ifdef HAVE_TPETRA_DEBUG
73 #endif // HAVE_TPETRA_DEBUG
77 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
78 typedef typename CountsViewType::non_const_value_type count_type;
79 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
80 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
81 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
83 typedef typename OutputOffsetsViewType::device_type device_type;
84 static_assert(std::is_same<
typename CountsViewType::device_type::execution_space,
85 typename device_type::execution_space>::value,
86 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
87 static_assert(Kokkos::is_view<OutputOffsetsViewType>::value,
88 "OutputOffsetsViewType must be a Kokkos::View.");
89 static_assert(std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
90 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
91 static_assert(std::is_integral<output_offset_type>::value,
92 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
93 static_assert(Kokkos::is_view<CountsViewType>::value,
94 "CountsViewType must be a Kokkos::View.");
95 static_assert(std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
96 "CountsViewType must be a nonconst Kokkos::View.");
97 static_assert(std::is_integral<count_type>::value,
98 "The type of each entry of CountsViewType must be a built-in integer type.");
99 static_assert(Kokkos::is_view<InputOffsetsViewType>::value,
100 "InputOffsetsViewType must be a Kokkos::View.");
101 static_assert(std::is_integral<input_offset_type>::value,
102 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
103 static_assert(Kokkos::is_view<InputLocalRowIndicesViewType>::value,
104 "InputLocalRowIndicesViewType must be a Kokkos::View.");
105 static_assert(std::is_integral<local_row_index_type>::value,
106 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
109 const CountsViewType& counts,
110 const InputOffsetsViewType& rowOffsets,
111 const InputLocalRowIndicesViewType& lclRowInds,
112 const InputLocalRowPidsViewType& lclRowPids)
113 : outputOffsets_(outputOffsets)
115 , rowOffsets_(rowOffsets)
116 , lclRowInds_(lclRowInds)
117 , lclRowPids_(lclRowPids)
121 const size_t numRowsToPack =
static_cast<size_t>(lclRowInds_.extent(0));
123 if (numRowsToPack != static_cast<size_t>(counts_.extent(0))) {
124 std::ostringstream os;
125 os <<
"lclRowInds.extent(0) = " << numRowsToPack
126 <<
" != counts.extent(0) = " << counts_.extent(0)
128 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str());
130 if (static_cast<size_t>(numRowsToPack + 1) !=
131 static_cast<size_t>(outputOffsets_.extent(0))) {
132 std::ostringstream os;
133 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
134 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent(0)
136 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str());
141 KOKKOS_INLINE_FUNCTION
void
142 operator()(
const local_row_index_type& curInd,
143 output_offset_type& update,
144 const bool final)
const {
146 if (curInd < static_cast<local_row_index_type>(0)) {
154 if (curInd >= static_cast<local_row_index_type>(outputOffsets_.extent(0))) {
159 outputOffsets_(curInd) = update;
162 if (curInd < static_cast<local_row_index_type>(counts_.extent(0))) {
163 const auto lclRow = lclRowInds_(curInd);
164 if (static_cast<size_t>(lclRow + 1) >= static_cast<size_t>(rowOffsets_.extent(0)) ||
165 static_cast<local_row_index_type>(lclRow) <
static_cast<local_row_index_type
>(0)) {
173 const count_type count =
174 static_cast<count_type
>(rowOffsets_(lclRow + 1) - rowOffsets_(lclRow));
178 const count_type numEntToPack = (count == 0)
179 ? static_cast<count_type>(0)
180 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
183 counts_(curInd) = numEntToPack;
185 update += numEntToPack;
195 auto error_h = Kokkos::create_mirror_view(error_);
206 OutputOffsetsViewType outputOffsets_;
207 CountsViewType counts_;
208 typename InputOffsetsViewType::const_type rowOffsets_;
209 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
210 typename InputLocalRowPidsViewType::const_type lclRowPids_;
211 Kokkos::View<int, device_type> error_;
223 template <
class OutputOffsetsViewType,
224 class CountsViewType,
225 class InputOffsetsViewType,
226 class InputLocalRowIndicesViewType,
227 class InputLocalRowPidsViewType>
228 typename CountsViewType::non_const_value_type
229 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
230 const CountsViewType& counts,
231 const InputOffsetsViewType& rowOffsets,
232 const InputLocalRowIndicesViewType& lclRowInds,
233 const InputLocalRowPidsViewType& lclRowPids) {
234 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
235 CountsViewType,
typename InputOffsetsViewType::const_type,
236 typename InputLocalRowIndicesViewType::const_type,
237 typename InputLocalRowPidsViewType::const_type>
239 typedef typename CountsViewType::non_const_value_type count_type;
240 typedef typename OutputOffsetsViewType::size_type size_type;
241 typedef typename OutputOffsetsViewType::execution_space execution_space;
242 typedef typename functor_type::local_row_index_type LO;
243 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
244 const char prefix[] =
"computeNumPacketsAndOffsets: ";
246 count_type count = 0;
247 const count_type numRowsToPack = lclRowInds.extent(0);
249 if (numRowsToPack == 0) {
252 TEUCHOS_TEST_FOR_EXCEPTION(rowOffsets.extent(0) <=
static_cast<size_type
>(1),
253 std::invalid_argument, prefix <<
"There is at least one row to pack, "
254 "but the graph has no rows. lclRowInds.extent(0) = "
255 << numRowsToPack <<
", but rowOffsets.extent(0) = " << rowOffsets.extent(0) <<
" <= 1.");
256 TEUCHOS_TEST_FOR_EXCEPTION(outputOffsets.extent(0) !=
257 static_cast<size_type
>(numRowsToPack + 1),
258 std::invalid_argument,
259 prefix <<
"Output dimension does not match number of rows to pack. "
260 <<
"outputOffsets.extent(0) = " << outputOffsets.extent(0)
261 <<
" != lclRowInds.extent(0) + 1 = "
262 <<
static_cast<size_type
>(numRowsToPack + 1) <<
".");
263 TEUCHOS_TEST_FOR_EXCEPTION(counts.extent(0) != numRowsToPack, std::invalid_argument,
264 prefix <<
"counts.extent(0) = " << counts.extent(0)
265 <<
" != numRowsToPack = " << numRowsToPack <<
".");
267 functor_type f(outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
268 Kokkos::parallel_scan(
"Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type(0, numRowsToPack + 1), f);
271 const int errCode = f.getError();
272 TEUCHOS_TEST_FOR_EXCEPTION(errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code " << errCode <<
" != 0.");
276 for (LO k = 0; k < numRowsToPack; ++k) {
279 if (outputOffsets(numRowsToPack) != total) {
280 if (errStr.get () == NULL) {
281 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
283 std::ostringstream& os = *errStr;
285 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
286 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
287 << total <<
"." << std::endl;
288 if (numRowsToPack != 0) {
290 if (numRowsToPack < static_cast<LO> (10)) {
291 os <<
"outputOffsets: [";
292 for (LO i = 0; i <= numRowsToPack; ++i) {
293 os << outputOffsets(i);
294 if (static_cast<LO> (i + 1) <= numRowsToPack) {
298 os <<
"]" << std::endl;
300 for (LO i = 0; i < numRowsToPack; ++i) {
302 if (static_cast<LO> (i + 1) < numRowsToPack) {
306 os <<
"]" << std::endl;
309 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
310 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
313 count = outputOffsets(numRowsToPack);
314 return {
false, errStr};
316 #endif // HAVE_TPETRA_DEBUG
320 using Tpetra::Details::getEntryOnHost;
321 return static_cast<count_type
>(getEntryOnHost(outputOffsets,
336 template <
class Packet,
338 class BufferDeviceType,
343 packRow(
const LocalMapType& col_map,
344 const Kokkos::View<Packet*, BufferDeviceType>& exports,
345 const InputLidsType& lids_in,
346 const InputPidsType& pids_in,
348 const size_t num_ent,
349 const bool pack_pids) {
350 using LO =
typename LocalMapType::local_ordinal_type;
351 using GO =
typename LocalMapType::global_ordinal_type;
355 return static_cast<size_t>(0);
358 size_t num_ent_packed = num_ent;
360 num_ent_packed += num_ent;
365 for (
size_t k = 0; k < num_ent; ++k) {
366 const LO lid = lids_in[k];
367 const GO gid = col_map.getGlobalElement(lid);
368 exports(offset + k) = gid;
372 for (
size_t k = 0; k < num_ent; ++k) {
373 const LO lid = lids_in[k];
374 const int pid = pids_in[lid];
375 exports(offset + num_ent + k) =
static_cast<GO
>(pid);
379 return num_ent_packed;
382 template <
class Packet,
385 class BufferDeviceType>
386 struct PackCrsGraphFunctor {
387 using local_graph_type = LocalGraph;
388 using local_map_type = LocalMap;
392 using num_packets_per_lid_view_type =
393 Kokkos::View<const size_t*, BufferDeviceType>;
394 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
395 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
396 using export_lids_view_type =
398 using source_pids_view_type =
402 typename num_packets_per_lid_view_type::non_const_value_type;
403 using offset_type =
typename offsets_view_type::non_const_value_type;
404 using value_type = Kokkos::pair<int, LO>;
406 static_assert(std::is_same<LO, typename local_graph_type::data_type>::value,
407 "local_map_type::local_ordinal_type and "
408 "local_graph_type::data_type must be the same.");
410 local_graph_type local_graph;
411 local_map_type local_col_map;
412 exports_view_type exports;
413 num_packets_per_lid_view_type num_packets_per_lid;
414 export_lids_view_type export_lids;
415 source_pids_view_type source_pids;
416 offsets_view_type offsets;
419 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
420 const local_map_type& local_col_map_in,
421 const exports_view_type& exports_in,
422 const num_packets_per_lid_view_type& num_packets_per_lid_in,
423 const export_lids_view_type& export_lids_in,
424 const source_pids_view_type& source_pids_in,
425 const offsets_view_type& offsets_in,
426 const bool pack_pids_in)
427 : local_graph(local_graph_in)
428 , local_col_map(local_col_map_in)
429 , exports(exports_in)
430 , num_packets_per_lid(num_packets_per_lid_in)
431 , export_lids(export_lids_in)
432 , source_pids(source_pids_in)
433 , offsets(offsets_in)
434 , pack_pids(pack_pids_in) {
435 const LO numRows = local_graph_in.numRows();
437 static_cast<LO
>(local_graph.row_map.extent(0));
438 TEUCHOS_TEST_FOR_EXCEPTION(numRows != 0 && rowMapDim != numRows + static_cast<LO>(1),
439 std::logic_error,
"local_graph.row_map.extent(0) = " << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
442 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const {
443 using ::Tpetra::Details::OrdinalTraits;
444 dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
447 KOKKOS_INLINE_FUNCTION
void
448 join(value_type& dst,
const value_type& src)
const {
451 if (src.first != 0 && dst.first == 0) {
456 KOKKOS_INLINE_FUNCTION
457 void operator()(
const LO i, value_type& dst)
const {
458 const size_t offset = offsets[i];
459 const LO export_lid = export_lids[i];
460 const size_t buf_size = exports.size();
461 const size_t num_packets_this_lid = num_packets_per_lid(i);
462 const size_t num_ent =
463 static_cast<size_t>(local_graph.row_map[export_lid + 1] - local_graph.row_map[export_lid]);
473 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
474 if (dst.first != 0) {
475 dst = Kokkos::make_pair(1, i);
478 }
else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
479 if (dst.first != 0) {
480 dst = Kokkos::make_pair(2, i);
490 const auto row_beg = local_graph.row_map[export_lid];
491 const auto row_end = local_graph.row_map[export_lid + 1];
492 auto lids_in = Kokkos::subview(local_graph.entries,
493 Kokkos::make_pair(row_beg, row_end));
494 size_t num_ent_packed_this_row =
495 packRow(local_col_map, exports, lids_in,
496 source_pids, offset, num_ent, pack_pids);
497 if (num_ent_packed_this_row != num_packets_this_lid) {
498 if (dst.first != 0) {
499 dst = Kokkos::make_pair(3, i);
512 template <
class Packet,
515 class BufferDeviceType>
516 void do_pack(
const LocalGraph& local_graph,
517 const LocalMap& local_map,
518 const Kokkos::View<Packet*, BufferDeviceType>& exports,
519 const typename PackTraits<
520 size_t>::input_array_type& num_packets_per_lid,
521 const typename PackTraits<
523 const typename PackTraits<
524 int>::input_array_type& source_pids,
525 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
526 const bool pack_pids) {
528 using execution_space =
typename LocalGraph::device_type::execution_space;
529 using range_type = Kokkos::RangePolicy<execution_space, LO>;
530 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
532 if (export_lids.extent(0) != 0) {
533 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(offsets.extent(0)) !=
534 static_cast<size_t>(export_lids.extent(0) + 1),
535 std::invalid_argument, prefix <<
"offsets.extent(0) = " << offsets.extent(0) <<
" != export_lids.extent(0) (= " << export_lids.extent(0) <<
") + 1.");
536 TEUCHOS_TEST_FOR_EXCEPTION(export_lids.extent(0) != num_packets_per_lid.extent(0),
537 std::invalid_argument, prefix <<
"export_lids.extent(0) = " << export_lids.extent(0) <<
" != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) <<
".");
541 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
542 source_pids.extent(0) == 0,
543 std::invalid_argument, prefix <<
"pack_pids is true, and exports.extent(0) = " << exports.extent(0) <<
" != 0, meaning that we need to pack at "
544 "least one graph entry, but source_pids.extent(0) = 0.");
547 using pack_functor_type =
548 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
550 pack_functor_type f(local_graph, local_map, exports,
551 num_packets_per_lid, export_lids,
552 source_pids, offsets, pack_pids);
554 typename pack_functor_type::value_type result;
555 range_type range(0, num_packets_per_lid.extent(0));
556 Kokkos::parallel_reduce(
"Tpetra::Details::computeNumPacketsAndOffsets::reduce", range, f, result);
558 if (result.first != 0) {
561 std::ostringstream os;
562 if (result.first == 1) {
563 os <<
"invalid local row index";
564 }
else if (result.first == 2) {
565 os <<
"invalid offset";
567 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor "
568 "reported error code "
569 << result.first <<
" (" << os.str() <<
") for the first bad row " << result.second <<
".");
599 template <
typename LO,
typename GO,
typename NT>
600 void packCrsGraph(
const CrsGraph<LO, GO, NT>& sourceGraph,
602 typename CrsGraph<LO, GO, NT>::packet_type*,
603 typename CrsGraph<LO, GO, NT>::buffer_device_type>& exports,
606 typename CrsGraph<LO, GO, NT>::buffer_device_type>& num_packets_per_lid,
609 typename CrsGraph<LO, GO, NT>::buffer_device_type>& export_lids,
612 typename CrsGraph<LO, GO, NT>::buffer_device_type>& export_pids,
613 size_t& constant_num_packets,
614 const bool pack_pids) {
616 using crs_graph_type = CrsGraph<LO, GO, NT>;
617 using packet_type =
typename crs_graph_type::packet_type;
618 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
619 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
620 using local_graph_device_type =
typename crs_graph_type::local_graph_device_type;
622 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
623 constexpr
bool debug =
false;
625 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice();
626 local_map_type local_col_map = sourceGraph.getColMap()->getLocalMap();
631 constant_num_packets = 0;
633 const size_t num_export_lids(export_lids.extent(0));
634 TEUCHOS_TEST_FOR_EXCEPTION(num_export_lids !=
size_t(num_packets_per_lid.extent(0)),
635 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = " << num_export_lids <<
" != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) <<
".");
636 if (num_export_lids != 0) {
637 TEUCHOS_TEST_FOR_EXCEPTION(num_packets_per_lid.data() ==
nullptr, std::invalid_argument,
638 prefix <<
"num_export_lids = " << num_export_lids <<
" != 0, but "
639 "num_packets_per_lid.data() = "
640 << num_packets_per_lid.data() <<
" == NULL.");
643 if (num_export_lids == 0) {
644 exports = exports_view_type(
"exports", 0);
649 View<size_t*, buffer_device_type> offsets(
"offsets", num_export_lids + 1);
654 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
655 local_graph.row_map, export_lids, export_pids);
658 if (count >
size_t(exports.extent(0))) {
659 exports = exports_view_type(
"exports", count);
661 std::ostringstream os;
662 os <<
"*** exports resized to " << count << std::endl;
663 std::cerr << os.str();
667 std::ostringstream os;
668 os <<
"*** count: " << count <<
", exports.extent(0): "
669 << exports.extent(0) << std::endl;
670 std::cerr << os.str();
676 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
677 export_pids.extent(0) == 0,
678 std::invalid_argument, prefix <<
"pack_pids is true, and exports.extent(0) = " << exports.extent(0) <<
" != 0, meaning that we need to pack at least "
679 "one graph entry, but export_pids.extent(0) = 0.");
681 exports.modify_device();
682 auto exports_d = exports.view_device();
683 do_pack<packet_type, local_graph_device_type, local_map_type, buffer_device_type>(local_graph, local_col_map, exports_d, num_packets_per_lid,
684 export_lids, export_pids, offsets, pack_pids);
690 template <
typename LO,
typename GO,
typename NT>
693 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
694 const Teuchos::ArrayView<const LO>& exportLIDs,
695 size_t& constantNumPackets) {
696 using Kokkos::HostSpace;
697 using Kokkos::MemoryUnmanaged;
700 using packet_type =
typename crs_graph_type::packet_type;
701 using BDT =
typename crs_graph_type::buffer_device_type;
708 View<size_t*, BDT> num_packets_per_lid_d =
710 numPacketsPerLID.getRawPtr(),
711 numPacketsPerLID.size(),
false,
712 "num_packets_per_lid");
715 View<const LO*, BDT> export_lids_d =
717 exportLIDs.getRawPtr(),
718 exportLIDs.size(),
true,
720 View<const int*, BDT> export_pids_d;
721 Kokkos::DualView<packet_type*, BDT> exports_dv;
722 constexpr
bool pack_pids =
false;
724 static_assert(std::is_same<
725 typename decltype(num_packets_per_lid_d)::non_const_value_type,
727 "num_packets_per_lid_d's non_const_value_type should be size_t.");
728 static_assert(std::is_same<
729 typename decltype(num_packets_per_lid_d)::device_type,
731 "num_packets_per_lid_d's BDT should be size_t.");
732 static_assert(std::is_same<
733 typename decltype(export_lids_d)::device_type,
735 "export_lids_d's device_type should be BDT.");
736 static_assert(std::is_same<
737 typename decltype(export_pids_d)::non_const_value_type,
739 "export_pids_d's non_const_value_type should be int.");
740 static_assert(std::is_same<
741 typename decltype(export_pids_d)::device_type,
743 "export_pids_d's device_type should be BDT.");
746 export_pids_d, constantNumPackets, pack_pids);
750 View<size_t*, HostSpace, MemoryUnmanaged>
751 num_packets_per_lid_h(numPacketsPerLID.getRawPtr(),
752 numPacketsPerLID.size());
755 using execution_space =
typename BDT::execution_space;
756 Kokkos::deep_copy(execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
763 if (static_cast<size_t>(exports.size()) !=
764 static_cast<size_t>(exports_dv.extent(0))) {
765 exports.resize(exports_dv.extent(0));
767 View<packet_type*, HostSpace, MemoryUnmanaged>
768 exports_h(exports.getRawPtr(), exports.size());
771 execution_space().fence();
776 template <
typename LO,
typename GO,
typename NT>
778 const Kokkos::DualView<
781 const Kokkos::DualView<
791 size_t& constant_num_packets,
792 const bool pack_pids) {
795 using BDT =
typename crs_graph_type::buffer_device_type;
796 using PT =
typename crs_graph_type::packet_type;
797 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
798 using LGT =
typename crs_graph_type::local_graph_device_type;
799 using LMT =
typename crs_graph_type::map_type::local_map_type;
800 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
803 const LMT local_col_map = sourceGraph.
getColMap()->getLocalMap();
808 constant_num_packets = 0;
810 const size_t num_export_lids =
811 static_cast<size_t>(export_lids.extent(0));
812 TEUCHOS_TEST_FOR_EXCEPTION(num_export_lids !=
813 static_cast<size_t>(num_packets_per_lid.extent(0)),
814 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = " << num_export_lids <<
" != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) <<
".");
815 TEUCHOS_TEST_FOR_EXCEPTION(num_export_lids != 0 &&
816 num_packets_per_lid.view_device().data() ==
nullptr,
817 std::invalid_argument, prefix <<
"num_export_lids = " << num_export_lids <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
819 if (num_export_lids == 0) {
820 exports = exports_dual_view_type();
825 using offsets_type = Kokkos::View<size_t*, BDT>;
826 offsets_type offsets(
"offsets", num_export_lids + 1);
830 num_packets_per_lid.clear_sync_state();
831 num_packets_per_lid.modify_device();
832 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
834 computeNumPacketsAndOffsets(offsets, num_packets_per_lid.view_device(),
836 export_lids.view_device(),
837 export_pids.view_device());
840 if (count > static_cast<size_t>(exports.extent(0))) {
841 exports = exports_dual_view_type(
"exports", count);
847 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
848 export_pids.extent(0) == 0,
849 std::invalid_argument, prefix <<
"pack_pids is true, and exports.extent(0) = " << exports.extent(0) <<
" != 0, meaning that we need to pack at least "
850 "one graph entry, but export_pids.extent(0) = 0.");
852 exports.modify_device();
853 using PackCrsGraphImpl::do_pack;
854 do_pack<PT, LGT, LMT, BDT>(local_graph, local_col_map,
855 exports.view_device(),
856 num_packets_per_lid.view_device(),
857 export_lids.view_device(),
858 export_pids.view_device(),
862 template <
typename LO,
typename GO,
typename NT>
867 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
868 const Teuchos::ArrayView<const LO>& exportLIDs,
869 const Teuchos::ArrayView<const int>& sourcePIDs,
870 size_t& constantNumPackets) {
871 using Kokkos::HostSpace;
872 using Kokkos::MemoryUnmanaged;
875 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
881 View<size_t*, buffer_device_type> num_packets_per_lid_d =
883 numPacketsPerLID.getRawPtr(),
884 numPacketsPerLID.size(),
false,
885 "num_packets_per_lid");
889 View<const LO*, buffer_device_type> export_lids_d =
891 exportLIDs.getRawPtr(),
892 exportLIDs.size(),
true,
896 View<const int*, buffer_device_type> export_pids_d =
898 sourcePIDs.getRawPtr(),
899 sourcePIDs.size(),
true,
901 constexpr
bool pack_pids =
true;
903 export_pids_d, constantNumPackets, pack_pids);
907 View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h(numPacketsPerLID.getRawPtr(), numPacketsPerLID.size());
909 using execution_space =
typename buffer_device_type::execution_space;
911 num_packets_per_lid_h, num_packets_per_lid_d);
912 execution_space().fence();
918 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT(LO, GO, NT) \
920 Details::packCrsGraph<LO, GO, NT>( \
921 const CrsGraph<LO, GO, NT>&, \
922 Teuchos::Array<CrsGraph<LO, GO, NT>::packet_type>&, \
923 const Teuchos::ArrayView<size_t>&, \
924 const Teuchos::ArrayView<const LO>&, \
927 Details::packCrsGraphNew<LO, GO, NT>( \
928 const CrsGraph<LO, GO, NT>&, \
929 const Kokkos::DualView< \
931 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
932 const Kokkos::DualView< \
934 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
936 CrsGraph<LO, GO, NT>::packet_type*, \
937 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
940 CrsGraph<LO, GO, NT>::buffer_device_type>, \
944 Details::packCrsGraphWithOwningPIDs<LO, GO, NT>( \
945 const CrsGraph<LO, GO, NT>&, \
946 Kokkos::DualView<CrsGraph<LO, GO, NT>::packet_type*, CrsGraph<LO, GO, NT>::buffer_device_type>&, \
947 const Teuchos::ArrayView<size_t>&, \
948 const Teuchos::ArrayView<const LO>&, \
949 const Teuchos::ArrayView<const int>&, \
952 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration of the Tpetra::CrsGraph class.
"Local" part of Map suitable for Kokkos kernels.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declaration and definition of Tpetra::Details::getEntryOnHost.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.