40 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
85 namespace PackCrsGraphImpl {
93 template<
class OutputOffsetsViewType,
95 class InputOffsetsViewType,
96 class InputLocalRowIndicesViewType,
97 class InputLocalRowPidsViewType,
99 #ifdef HAVE_TPETRA_DEBUG
103 #endif // HAVE_TPETRA_DEBUG
107 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
108 typedef typename CountsViewType::non_const_value_type count_type;
109 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
110 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
111 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
113 typedef typename OutputOffsetsViewType::device_type device_type;
114 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
115 typename device_type::execution_space>::value,
116 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
117 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
118 "OutputOffsetsViewType must be a Kokkos::View.");
119 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
120 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
121 static_assert (std::is_integral<output_offset_type>::value,
122 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
123 static_assert (Kokkos::is_view<CountsViewType>::value,
124 "CountsViewType must be a Kokkos::View.");
125 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
126 "CountsViewType must be a nonconst Kokkos::View.");
127 static_assert (std::is_integral<count_type>::value,
128 "The type of each entry of CountsViewType must be a built-in integer type.");
129 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
130 "InputOffsetsViewType must be a Kokkos::View.");
131 static_assert (std::is_integral<input_offset_type>::value,
132 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
133 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
134 "InputLocalRowIndicesViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<local_row_index_type>::value,
136 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
139 const CountsViewType& counts,
140 const InputOffsetsViewType& rowOffsets,
141 const InputLocalRowIndicesViewType& lclRowInds,
142 const InputLocalRowPidsViewType& lclRowPids) :
143 outputOffsets_ (outputOffsets),
145 rowOffsets_ (rowOffsets),
146 lclRowInds_ (lclRowInds),
147 lclRowPids_ (lclRowPids),
151 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
153 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
154 std::ostringstream os;
155 os <<
"lclRowInds.extent(0) = " << numRowsToPack
156 <<
" != counts.extent(0) = " << counts_.extent (0)
158 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
160 if (static_cast<size_t> (numRowsToPack + 1) !=
161 static_cast<size_t> (outputOffsets_.extent (0))) {
162 std::ostringstream os;
163 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
164 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
166 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
171 KOKKOS_INLINE_FUNCTION
void
172 operator() (
const local_row_index_type& curInd,
173 output_offset_type& update,
174 const bool final)
const
177 if (curInd < static_cast<local_row_index_type> (0)) {
185 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
190 outputOffsets_(curInd) = update;
193 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
194 const auto lclRow = lclRowInds_(curInd);
195 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
196 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
204 const count_type count =
205 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
209 const count_type numEntToPack = (count == 0)
210 ? static_cast<count_type>(0)
211 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
214 counts_(curInd) = numEntToPack;
216 update += numEntToPack;
226 auto error_h = Kokkos::create_mirror_view (error_);
228 using execution_space =
typename device_type::execution_space;
234 OutputOffsetsViewType outputOffsets_;
235 CountsViewType counts_;
236 typename InputOffsetsViewType::const_type rowOffsets_;
237 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
238 typename InputLocalRowPidsViewType::const_type lclRowPids_;
239 Kokkos::View<int, device_type> error_;
251 template<
class OutputOffsetsViewType,
252 class CountsViewType,
253 class InputOffsetsViewType,
254 class InputLocalRowIndicesViewType,
255 class InputLocalRowPidsViewType>
256 typename CountsViewType::non_const_value_type
257 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
258 const CountsViewType& counts,
259 const InputOffsetsViewType& rowOffsets,
260 const InputLocalRowIndicesViewType& lclRowInds,
261 const InputLocalRowPidsViewType& lclRowPids)
263 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
264 CountsViewType,
typename InputOffsetsViewType::const_type,
265 typename InputLocalRowIndicesViewType::const_type,
266 typename InputLocalRowPidsViewType::const_type> functor_type;
267 typedef typename CountsViewType::non_const_value_type count_type;
268 typedef typename OutputOffsetsViewType::size_type size_type;
269 typedef typename OutputOffsetsViewType::execution_space execution_space;
270 typedef typename functor_type::local_row_index_type LO;
271 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
272 const char prefix[] =
"computeNumPacketsAndOffsets: ";
274 count_type count = 0;
275 const count_type numRowsToPack = lclRowInds.extent (0);
277 if (numRowsToPack == 0) {
281 TEUCHOS_TEST_FOR_EXCEPTION
282 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
283 std::invalid_argument, prefix <<
"There is at least one row to pack, "
284 "but the graph has no rows. lclRowInds.extent(0) = " <<
285 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
286 rowOffsets.extent (0) <<
" <= 1.");
287 TEUCHOS_TEST_FOR_EXCEPTION
288 (outputOffsets.extent (0) !=
289 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
290 prefix <<
"Output dimension does not match number of rows to pack. "
291 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
292 <<
" != lclRowInds.extent(0) + 1 = "
293 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
294 TEUCHOS_TEST_FOR_EXCEPTION
295 (counts.extent (0) != numRowsToPack, std::invalid_argument,
296 prefix <<
"counts.extent(0) = " << counts.extent (0)
297 <<
" != numRowsToPack = " << numRowsToPack <<
".");
299 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
300 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
303 const int errCode = f.getError ();
304 TEUCHOS_TEST_FOR_EXCEPTION
305 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
306 << errCode <<
" != 0.");
310 for (LO k = 0; k < numRowsToPack; ++k) {
313 if (outputOffsets(numRowsToPack) != total) {
314 if (errStr.get () == NULL) {
315 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
317 std::ostringstream& os = *errStr;
319 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
320 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
321 << total <<
"." << std::endl;
322 if (numRowsToPack != 0) {
324 if (numRowsToPack < static_cast<LO> (10)) {
325 os <<
"outputOffsets: [";
326 for (LO i = 0; i <= numRowsToPack; ++i) {
327 os << outputOffsets(i);
328 if (static_cast<LO> (i + 1) <= numRowsToPack) {
332 os <<
"]" << std::endl;
334 for (LO i = 0; i < numRowsToPack; ++i) {
336 if (static_cast<LO> (i + 1) < numRowsToPack) {
340 os <<
"]" << std::endl;
343 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
344 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
347 count = outputOffsets(numRowsToPack);
348 return {
false, errStr};
350 #endif // HAVE_TPETRA_DEBUG
354 using Tpetra::Details::getEntryOnHost;
355 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
370 template<
class Packet,
372 class BufferDeviceType,
377 packRow(
const LocalMapType& col_map,
378 const Kokkos::View<Packet*, BufferDeviceType>& exports,
379 const InputLidsType& lids_in,
380 const InputPidsType& pids_in,
382 const size_t num_ent,
383 const bool pack_pids)
385 using LO =
typename LocalMapType::local_ordinal_type;
386 using GO =
typename LocalMapType::global_ordinal_type;
390 return static_cast<size_t>(0);
393 size_t num_ent_packed = num_ent;
395 num_ent_packed += num_ent;
400 for (
size_t k = 0; k < num_ent; ++k) {
401 const LO lid = lids_in[k];
402 const GO gid = col_map.getGlobalElement (lid);
403 exports(offset+k) = gid;
407 for (
size_t k = 0; k < num_ent; ++k) {
408 const LO lid = lids_in[k];
409 const int pid = pids_in[lid];
410 exports(offset+num_ent+k) =
static_cast<GO
>(pid);
414 return num_ent_packed;
417 template<
class Packet,
420 class BufferDeviceType>
421 struct PackCrsGraphFunctor {
422 using local_graph_type = LocalGraph;
423 using local_map_type = LocalMap;
427 using num_packets_per_lid_view_type =
428 Kokkos::View<const size_t*, BufferDeviceType>;
429 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
430 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
431 using export_lids_view_type =
433 using source_pids_view_type =
437 typename num_packets_per_lid_view_type::non_const_value_type;
438 using offset_type =
typename offsets_view_type::non_const_value_type;
439 using value_type = Kokkos::pair<int, LO>;
441 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
442 "local_map_type::local_ordinal_type and "
443 "local_graph_type::data_type must be the same.");
445 local_graph_type local_graph;
446 local_map_type local_col_map;
447 exports_view_type exports;
448 num_packets_per_lid_view_type num_packets_per_lid;
449 export_lids_view_type export_lids;
450 source_pids_view_type source_pids;
451 offsets_view_type offsets;
454 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
455 const local_map_type& local_col_map_in,
456 const exports_view_type& exports_in,
457 const num_packets_per_lid_view_type& num_packets_per_lid_in,
458 const export_lids_view_type& export_lids_in,
459 const source_pids_view_type& source_pids_in,
460 const offsets_view_type& offsets_in,
461 const bool pack_pids_in) :
462 local_graph (local_graph_in),
463 local_col_map (local_col_map_in),
464 exports (exports_in),
465 num_packets_per_lid (num_packets_per_lid_in),
466 export_lids (export_lids_in),
467 source_pids (source_pids_in),
468 offsets (offsets_in),
469 pack_pids (pack_pids_in)
471 const LO numRows = local_graph_in.numRows ();
473 static_cast<LO
> (local_graph.row_map.extent (0));
474 TEUCHOS_TEST_FOR_EXCEPTION
475 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
476 std::logic_error,
"local_graph.row_map.extent(0) = "
477 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
480 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
482 using ::Tpetra::Details::OrdinalTraits;
483 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
486 KOKKOS_INLINE_FUNCTION
void
487 join (value_type& dst,
const value_type& src)
const
491 if (src.first != 0 && dst.first == 0) {
496 KOKKOS_INLINE_FUNCTION
497 void operator() (
const LO i, value_type& dst)
const
499 const size_t offset = offsets[i];
500 const LO export_lid = export_lids[i];
501 const size_t buf_size = exports.size();
502 const size_t num_packets_this_lid = num_packets_per_lid(i);
503 const size_t num_ent =
504 static_cast<size_t> (local_graph.row_map[export_lid+1]
505 - local_graph.row_map[export_lid]);
515 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
516 if (dst.first != 0) {
517 dst = Kokkos::make_pair (1, i);
521 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
522 if (dst.first != 0) {
523 dst = Kokkos::make_pair (2, i);
533 const auto row_beg = local_graph.row_map[export_lid];
534 const auto row_end = local_graph.row_map[export_lid + 1];
535 auto lids_in = Kokkos::subview (local_graph.entries,
536 Kokkos::make_pair (row_beg, row_end));
537 size_t num_ent_packed_this_row =
538 packRow (local_col_map, exports, lids_in,
539 source_pids, offset, num_ent, pack_pids);
540 if (num_ent_packed_this_row != num_packets_this_lid) {
541 if (dst.first != 0) {
542 dst = Kokkos::make_pair (3, i);
555 template<
class Packet,
558 class BufferDeviceType>
560 do_pack(
const LocalGraph& local_graph,
561 const LocalMap& local_map,
562 const Kokkos::View<Packet*, BufferDeviceType>& exports,
563 const typename PackTraits<
565 >::input_array_type& num_packets_per_lid,
566 const typename PackTraits<
568 >::input_array_type& export_lids,
569 const typename PackTraits<
571 >::input_array_type& source_pids,
572 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
573 const bool pack_pids)
576 using execution_space =
typename LocalGraph::device_type::execution_space;
577 using range_type = Kokkos::RangePolicy<execution_space, LO>;
578 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
580 if (export_lids.extent (0) != 0) {
581 TEUCHOS_TEST_FOR_EXCEPTION
582 (static_cast<size_t> (offsets.extent (0)) !=
583 static_cast<size_t> (export_lids.extent (0) + 1),
584 std::invalid_argument, prefix <<
"offsets.extent(0) = "
585 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
586 << export_lids.extent (0) <<
") + 1.");
587 TEUCHOS_TEST_FOR_EXCEPTION
588 (export_lids.extent (0) != num_packets_per_lid.extent (0),
589 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
590 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
591 << num_packets_per_lid.extent (0) <<
".");
595 TEUCHOS_TEST_FOR_EXCEPTION
596 (pack_pids && exports.extent (0) != 0 &&
597 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
598 "pack_pids is true, and exports.extent(0) = " <<
599 exports.extent (0) <<
" != 0, meaning that we need to pack at "
600 "least one graph entry, but source_pids.extent(0) = 0.");
603 using pack_functor_type =
604 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
606 pack_functor_type f (local_graph, local_map, exports,
607 num_packets_per_lid, export_lids,
608 source_pids, offsets, pack_pids);
610 typename pack_functor_type::value_type result;
611 range_type range (0, num_packets_per_lid.extent (0));
612 Kokkos::parallel_reduce (range, f, result);
614 if (result.first != 0) {
617 std::ostringstream os;
618 if (result.first == 1) {
619 os <<
"invalid local row index";
621 else if (result.first == 2) {
622 os <<
"invalid offset";
624 TEUCHOS_TEST_FOR_EXCEPTION
625 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor "
626 "reported error code " << result.first <<
" (" << os.str ()
627 <<
") for the first bad row " << result.second <<
".");
657 template<
typename LO,
typename GO,
typename NT>
660 (
const CrsGraph<LO,GO,NT>& sourceGraph,
662 typename CrsGraph<LO,GO,NT>::packet_type*,
663 typename CrsGraph<LO,GO,NT>::buffer_device_type
667 typename CrsGraph<LO,GO,NT>::buffer_device_type
668 >& num_packets_per_lid,
671 typename CrsGraph<LO, GO, NT>::buffer_device_type
675 typename CrsGraph<LO, GO, NT>::buffer_device_type
677 size_t& constant_num_packets,
678 const bool pack_pids)
681 using crs_graph_type = CrsGraph<LO, GO, NT>;
682 using packet_type =
typename crs_graph_type::packet_type;
683 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
684 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
685 using local_graph_device_type =
typename crs_graph_type::local_graph_device_type;
687 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
688 constexpr
bool debug =
false;
690 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice ();
691 local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
696 constant_num_packets = 0;
698 const size_t num_export_lids (export_lids.extent (0));
699 TEUCHOS_TEST_FOR_EXCEPTION
700 (num_export_lids !=
size_t (num_packets_per_lid.extent (0)),
701 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
702 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
703 << num_packets_per_lid.extent (0) <<
".");
704 if (num_export_lids != 0) {
705 TEUCHOS_TEST_FOR_EXCEPTION
706 (num_packets_per_lid.data () ==
nullptr, std::invalid_argument,
707 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
708 "num_packets_per_lid.data() = "
709 << num_packets_per_lid.data () <<
" == NULL.");
712 if (num_export_lids == 0) {
713 exports = exports_view_type (
"exports", 0);
718 View<size_t*, buffer_device_type> offsets (
"offsets", num_export_lids + 1);
723 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
724 local_graph.row_map, export_lids, export_pids);
727 if (count >
size_t (exports.extent (0))) {
728 exports = exports_view_type (
"exports", count);
730 std::ostringstream os;
731 os <<
"*** exports resized to " << count << std::endl;
732 std::cerr << os.str ();
736 std::ostringstream os;
737 os <<
"*** count: " << count <<
", exports.extent(0): "
738 << exports.extent (0) << std::endl;
739 std::cerr << os.str ();
745 TEUCHOS_TEST_FOR_EXCEPTION
746 (pack_pids && exports.extent (0) != 0 &&
747 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
748 "pack_pids is true, and exports.extent(0) = " <<
749 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
750 "one graph entry, but export_pids.extent(0) = 0.");
752 exports.modify_device ();
753 auto exports_d = exports.view_device ();
754 do_pack<packet_type, local_graph_device_type, local_map_type, buffer_device_type>
755 (local_graph, local_col_map, exports_d, num_packets_per_lid,
756 export_lids, export_pids, offsets, pack_pids);
762 template<
typename LO,
typename GO,
typename NT>
766 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
767 const Teuchos::ArrayView<const LO>& exportLIDs,
768 size_t& constantNumPackets)
770 using Kokkos::HostSpace;
771 using Kokkos::MemoryUnmanaged;
774 using packet_type =
typename crs_graph_type::packet_type;
775 using BDT =
typename crs_graph_type::buffer_device_type;
782 View<size_t*, BDT> num_packets_per_lid_d =
784 numPacketsPerLID.getRawPtr (),
785 numPacketsPerLID.size (),
false,
786 "num_packets_per_lid");
789 View<const LO*, BDT> export_lids_d =
791 exportLIDs.getRawPtr (),
792 exportLIDs.size (),
true,
794 View<const int*, BDT> export_pids_d;
795 Kokkos::DualView<packet_type*, BDT> exports_dv;
796 constexpr
bool pack_pids =
false;
800 typename decltype (num_packets_per_lid_d)::non_const_value_type,
802 "num_packets_per_lid_d's non_const_value_type should be size_t.");
805 typename decltype (num_packets_per_lid_d)::device_type,
807 "num_packets_per_lid_d's BDT should be size_t.");
810 typename decltype (export_lids_d)::device_type,
812 "export_lids_d's device_type should be BDT.");
815 typename decltype (export_pids_d)::non_const_value_type,
817 "export_pids_d's non_const_value_type should be int.");
820 typename decltype (export_pids_d)::device_type,
822 "export_pids_d's device_type should be BDT.");
825 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
826 export_pids_d, constantNumPackets, pack_pids);
830 View<size_t*, HostSpace, MemoryUnmanaged>
831 num_packets_per_lid_h (numPacketsPerLID.getRawPtr (),
832 numPacketsPerLID.size ());
835 using execution_space =
typename BDT::execution_space;
836 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
843 if (static_cast<size_t> (exports.size ()) !=
844 static_cast<size_t> (exports_dv.extent (0))) {
845 exports.resize (exports_dv.extent (0));
847 View<packet_type*, HostSpace, MemoryUnmanaged>
848 exports_h (exports.getRawPtr (), exports.size ());
851 execution_space().fence();
856 template<
typename LO,
typename GO,
typename NT>
859 const Kokkos::DualView<
863 const Kokkos::DualView<
873 > num_packets_per_lid,
874 size_t& constant_num_packets,
875 const bool pack_pids)
879 using BDT =
typename crs_graph_type::buffer_device_type;
880 using PT =
typename crs_graph_type::packet_type;
881 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
882 using LGT =
typename crs_graph_type::local_graph_device_type;
883 using LMT =
typename crs_graph_type::map_type::local_map_type;
884 const char prefix[] =
"Tpetra::Details::packCrsGraphNew: ";
887 const LMT local_col_map = sourceGraph.
getColMap ()->getLocalMap ();
892 constant_num_packets = 0;
894 const size_t num_export_lids =
895 static_cast<size_t> (export_lids.extent (0));
896 TEUCHOS_TEST_FOR_EXCEPTION
898 static_cast<size_t> (num_packets_per_lid.extent (0)),
899 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
900 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
901 << num_packets_per_lid.extent (0) <<
".");
902 TEUCHOS_TEST_FOR_EXCEPTION
903 (num_export_lids != 0 &&
904 num_packets_per_lid.view_device ().data () ==
nullptr,
905 std::invalid_argument, prefix <<
"num_export_lids = "<< num_export_lids
906 <<
" != 0, but num_packets_per_lid.view_device().data() = nullptr.");
908 if (num_export_lids == 0) {
909 exports = exports_dual_view_type ();
914 using offsets_type = Kokkos::View<size_t*, BDT>;
915 offsets_type offsets (
"offsets", num_export_lids + 1);
919 num_packets_per_lid.clear_sync_state ();
920 num_packets_per_lid.modify_device ();
921 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
923 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
925 export_lids.view_device (),
926 export_pids.view_device ());
929 if (count > static_cast<size_t> (exports.extent (0))) {
930 exports = exports_dual_view_type (
"exports", count);
936 TEUCHOS_TEST_FOR_EXCEPTION
937 (pack_pids && exports.extent (0) != 0 &&
938 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
939 "pack_pids is true, and exports.extent(0) = " <<
940 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
941 "one graph entry, but export_pids.extent(0) = 0.");
943 exports.modify_device ();
944 using PackCrsGraphImpl::do_pack;
945 do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
946 exports.view_device (),
947 num_packets_per_lid.view_device (),
948 export_lids.view_device (),
949 export_pids.view_device (),
953 template<
typename LO,
typename GO,
typename NT>
961 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
962 const Teuchos::ArrayView<const LO>& exportLIDs,
963 const Teuchos::ArrayView<const int>& sourcePIDs,
964 size_t& constantNumPackets)
966 using Kokkos::HostSpace;
967 using Kokkos::MemoryUnmanaged;
970 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
976 View<size_t*, buffer_device_type> num_packets_per_lid_d =
978 numPacketsPerLID.getRawPtr (),
979 numPacketsPerLID.size (),
false,
980 "num_packets_per_lid");
984 View<const LO*, buffer_device_type> export_lids_d =
986 exportLIDs.getRawPtr (),
987 exportLIDs.size (),
true,
991 View<const int*, buffer_device_type> export_pids_d =
993 sourcePIDs.getRawPtr (),
994 sourcePIDs.size (),
true,
996 constexpr
bool pack_pids =
true;
998 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
999 export_pids_d, constantNumPackets, pack_pids);
1003 View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h
1004 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1006 using execution_space =
typename buffer_device_type::execution_space;
1008 num_packets_per_lid_h, num_packets_per_lid_d);
1009 execution_space().fence();
1015 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
1017 Details::packCrsGraph<LO, GO, NT> ( \
1018 const CrsGraph<LO, GO, NT>&, \
1019 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
1020 const Teuchos::ArrayView<size_t>&, \
1021 const Teuchos::ArrayView<const LO>&, \
1024 Details::packCrsGraphNew<LO, GO, NT> ( \
1025 const CrsGraph<LO, GO, NT>&, \
1026 const Kokkos::DualView< \
1028 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1029 const Kokkos::DualView< \
1031 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1033 CrsGraph<LO,GO,NT>::packet_type*, \
1034 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1037 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1041 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1042 const CrsGraph<LO, GO, NT>&, \
1043 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1044 const Teuchos::ArrayView<size_t>&, \
1045 const Teuchos::ArrayView<const LO>&, \
1046 const Teuchos::ArrayView<const int>&, \
1049 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Declaration of the Tpetra::CrsGraph class.
"Local" part of Map suitable for Kokkos kernels.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declaration and definition of Tpetra::Details::getEntryOnHost.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.