42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
85 #ifndef DOXYGEN_SHOULD_SKIP_THIS
88 #endif // DOXYGEN_SHOULD_SKIP_THIS
95 namespace PackCrsMatrixImpl {
103 template<
class OutputOffsetsViewType,
104 class CountsViewType,
105 class InputOffsetsViewType,
106 class InputLocalRowIndicesViewType,
107 class InputLocalRowPidsViewType,
109 #ifdef HAVE_TPETRA_DEBUG
113 #endif // HAVE_TPETRA_DEBUG
117 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
118 typedef typename CountsViewType::non_const_value_type count_type;
119 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
120 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
121 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
123 typedef typename OutputOffsetsViewType::device_type device_type;
124 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
125 typename device_type::execution_space>::value,
126 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
127 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
128 "OutputOffsetsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
130 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<output_offset_type>::value,
132 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
134 "CountsViewType must be a Kokkos::View.");
135 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
136 "CountsViewType must be a nonconst Kokkos::View.");
137 static_assert (std::is_integral<count_type>::value,
138 "The type of each entry of CountsViewType must be a built-in integer type.");
139 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
140 "InputOffsetsViewType must be a Kokkos::View.");
141 static_assert (std::is_integral<input_offset_type>::value,
142 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
143 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
144 "InputLocalRowIndicesViewType must be a Kokkos::View.");
145 static_assert (std::is_integral<local_row_index_type>::value,
146 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
149 const CountsViewType& counts,
150 const InputOffsetsViewType& rowOffsets,
151 const InputLocalRowIndicesViewType& lclRowInds,
152 const InputLocalRowPidsViewType& lclRowPids,
153 const count_type sizeOfLclCount,
154 const count_type sizeOfGblColInd,
155 const count_type sizeOfPid,
156 const count_type sizeOfValue) :
157 outputOffsets_ (outputOffsets),
159 rowOffsets_ (rowOffsets),
160 lclRowInds_ (lclRowInds),
161 lclRowPids_ (lclRowPids),
162 sizeOfLclCount_ (sizeOfLclCount),
163 sizeOfGblColInd_ (sizeOfGblColInd),
164 sizeOfPid_ (sizeOfPid),
165 sizeOfValue_ (sizeOfValue),
169 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
171 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
172 std::ostringstream os;
173 os <<
"lclRowInds.extent(0) = " << numRowsToPack
174 <<
" != counts.extent(0) = " << counts_.extent (0)
176 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
178 if (static_cast<size_t> (numRowsToPack + 1) !=
179 static_cast<size_t> (outputOffsets_.extent (0))) {
180 std::ostringstream os;
181 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
182 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
184 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
189 KOKKOS_INLINE_FUNCTION
void
190 operator() (
const local_row_index_type& curInd,
191 output_offset_type& update,
192 const bool final)
const
195 if (curInd < static_cast<local_row_index_type> (0)) {
203 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
208 outputOffsets_(curInd) = update;
211 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
212 const auto lclRow = lclRowInds_(curInd);
213 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
214 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
222 const count_type count =
223 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
229 const count_type numBytes = (count == 0) ?
230 static_cast<count_type> (0) :
231 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
232 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
236 counts_(curInd) = numBytes;
248 auto error_h = Kokkos::create_mirror_view (error_);
254 OutputOffsetsViewType outputOffsets_;
255 CountsViewType counts_;
256 typename InputOffsetsViewType::const_type rowOffsets_;
257 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
258 typename InputLocalRowPidsViewType::const_type lclRowPids_;
259 count_type sizeOfLclCount_;
260 count_type sizeOfGblColInd_;
261 count_type sizeOfPid_;
262 count_type sizeOfValue_;
263 Kokkos::View<int, device_type> error_;
275 template<
class OutputOffsetsViewType,
276 class CountsViewType,
277 class InputOffsetsViewType,
278 class InputLocalRowIndicesViewType,
279 class InputLocalRowPidsViewType>
280 typename CountsViewType::non_const_value_type
281 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
282 const CountsViewType& counts,
283 const InputOffsetsViewType& rowOffsets,
284 const InputLocalRowIndicesViewType& lclRowInds,
285 const InputLocalRowPidsViewType& lclRowPids,
286 const typename CountsViewType::non_const_value_type sizeOfLclCount,
287 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
288 const typename CountsViewType::non_const_value_type sizeOfPid,
289 const typename CountsViewType::non_const_value_type sizeOfValue)
291 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
292 CountsViewType,
typename InputOffsetsViewType::const_type,
293 typename InputLocalRowIndicesViewType::const_type,
294 typename InputLocalRowPidsViewType::const_type> functor_type;
295 typedef typename CountsViewType::non_const_value_type count_type;
296 typedef typename OutputOffsetsViewType::size_type size_type;
297 typedef typename OutputOffsetsViewType::execution_space execution_space;
298 typedef typename functor_type::local_row_index_type LO;
299 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
300 const char prefix[] =
"computeNumPacketsAndOffsets: ";
302 count_type count = 0;
303 const count_type numRowsToPack = lclRowInds.extent (0);
305 if (numRowsToPack == 0) {
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
311 std::invalid_argument, prefix <<
"There is at least one row to pack, "
312 "but the matrix has no rows. lclRowInds.extent(0) = " <<
313 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
314 rowOffsets.extent (0) <<
" <= 1.");
315 TEUCHOS_TEST_FOR_EXCEPTION
316 (outputOffsets.extent (0) !=
317 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
318 prefix <<
"Output dimension does not match number of rows to pack. "
319 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
320 <<
" != lclRowInds.extent(0) + 1 = "
321 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
322 TEUCHOS_TEST_FOR_EXCEPTION
323 (counts.extent (0) != numRowsToPack, std::invalid_argument,
324 prefix <<
"counts.extent(0) = " << counts.extent (0)
325 <<
" != numRowsToPack = " << numRowsToPack <<
".");
327 functor_type f (outputOffsets, counts, rowOffsets,
328 lclRowInds, lclRowPids, sizeOfLclCount,
329 sizeOfGblColInd, sizeOfPid, sizeOfValue);
330 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
333 const int errCode = f.getError ();
334 TEUCHOS_TEST_FOR_EXCEPTION
335 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
336 << errCode <<
" != 0.");
340 for (LO k = 0; k < numRowsToPack; ++k) {
343 if (outputOffsets(numRowsToPack) != total) {
344 if (errStr.get () == NULL) {
345 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
347 std::ostringstream& os = *errStr;
349 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
350 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
351 << total <<
"." << std::endl;
352 if (numRowsToPack != 0) {
354 if (numRowsToPack < static_cast<LO> (10)) {
355 os <<
"outputOffsets: [";
356 for (LO i = 0; i <= numRowsToPack; ++i) {
357 os << outputOffsets(i);
358 if (static_cast<LO> (i + 1) <= numRowsToPack) {
362 os <<
"]" << std::endl;
364 for (LO i = 0; i < numRowsToPack; ++i) {
366 if (static_cast<LO> (i + 1) < numRowsToPack) {
370 os <<
"]" << std::endl;
373 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
374 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
377 count = outputOffsets(numRowsToPack);
378 return {
false, errStr};
380 #endif // HAVE_TPETRA_DEBUG
384 using Tpetra::Details::getEntryOnHost;
385 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
405 template<
class ST,
class ColumnMap,
class BufferDeviceType>
407 Kokkos::pair<int, size_t>
408 packCrsMatrixRow (
const ColumnMap& col_map,
409 const Kokkos::View<char*, BufferDeviceType>& exports,
410 const typename PackTraits<typename ColumnMap::local_ordinal_type, typename ColumnMap::device_type>::input_array_type& lids_in,
411 const typename PackTraits<int, typename ColumnMap::device_type>::input_array_type& pids_in,
412 const typename PackTraits<ST, typename ColumnMap::device_type>::input_array_type& vals_in,
414 const size_t num_ent,
415 const size_t num_bytes_per_value,
416 const bool pack_pids)
418 using Kokkos::subview;
419 typedef typename ColumnMap::local_ordinal_type LO;
420 typedef typename ColumnMap::global_ordinal_type GO;
421 typedef BufferDeviceType BDT;
422 typedef Kokkos::pair<int, size_t> return_type;
426 return return_type (0, 0);
429 const LO num_ent_LO =
static_cast<LO
> (num_ent);
430 const size_t num_ent_beg = offset;
433 const size_t gids_beg = num_ent_beg + num_ent_len;
436 const size_t pids_beg = gids_beg + gids_len;
437 const size_t pids_len = pack_pids ?
439 static_cast<size_t> (0);
441 const size_t vals_beg = gids_beg + gids_len + pids_len;
442 const size_t vals_len = num_ent * num_bytes_per_value;
444 char*
const num_ent_out = exports.data () + num_ent_beg;
445 char*
const gids_out = exports.data () + gids_beg;
446 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
447 char*
const vals_out = exports.data () + vals_beg;
449 size_t num_bytes_out = 0;
456 for (
size_t k = 0; k < num_ent; ++k) {
457 const LO lid = lids_in[k];
458 const GO gid = col_map.getGlobalElement (lid);
463 for (
size_t k = 0; k < num_ent; ++k) {
464 const LO lid = lids_in[k];
465 const int pid = pids_in[lid];
471 error_code += p.first;
472 num_bytes_out += p.second;
475 if (error_code != 0) {
476 return return_type (10, num_bytes_out);
479 const size_t expected_num_bytes =
480 num_ent_len + gids_len + pids_len + vals_len;
481 if (num_bytes_out != expected_num_bytes) {
482 return return_type (11, num_bytes_out);
484 return return_type (0, num_bytes_out);
487 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
488 struct PackCrsMatrixFunctor {
489 typedef LocalMatrix local_matrix_type;
490 typedef LocalMap local_map_type;
491 typedef typename local_matrix_type::value_type ST;
492 typedef typename local_map_type::local_ordinal_type LO;
493 typedef typename local_map_type::global_ordinal_type GO;
494 typedef typename local_matrix_type::device_type DT;
496 typedef Kokkos::View<const size_t*, BufferDeviceType>
497 num_packets_per_lid_view_type;
498 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
499 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
501 export_lids_view_type;
503 source_pids_view_type;
505 typedef typename num_packets_per_lid_view_type::non_const_value_type
507 typedef typename offsets_view_type::non_const_value_type
509 typedef Kokkos::pair<int, LO> value_type;
511 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
512 "local_map_type::local_ordinal_type and "
513 "local_matrix_type::ordinal_type must be the same.");
515 local_matrix_type local_matrix;
516 local_map_type local_col_map;
517 exports_view_type exports;
518 num_packets_per_lid_view_type num_packets_per_lid;
519 export_lids_view_type export_lids;
520 source_pids_view_type source_pids;
521 offsets_view_type offsets;
522 size_t num_bytes_per_value;
525 PackCrsMatrixFunctor (
const local_matrix_type& local_matrix_in,
526 const local_map_type& local_col_map_in,
527 const exports_view_type& exports_in,
528 const num_packets_per_lid_view_type& num_packets_per_lid_in,
529 const export_lids_view_type& export_lids_in,
530 const source_pids_view_type& source_pids_in,
531 const offsets_view_type& offsets_in,
532 const size_t num_bytes_per_value_in,
533 const bool pack_pids_in) :
534 local_matrix (local_matrix_in),
535 local_col_map (local_col_map_in),
536 exports (exports_in),
537 num_packets_per_lid (num_packets_per_lid_in),
538 export_lids (export_lids_in),
539 source_pids (source_pids_in),
540 offsets (offsets_in),
541 num_bytes_per_value (num_bytes_per_value_in),
542 pack_pids (pack_pids_in)
544 const LO numRows = local_matrix_in.numRows ();
546 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
547 TEUCHOS_TEST_FOR_EXCEPTION
548 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
549 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
550 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
553 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
555 using ::Tpetra::Details::OrdinalTraits;
556 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
559 KOKKOS_INLINE_FUNCTION
void
560 join (
volatile value_type& dst,
const volatile value_type& src)
const
564 if (src.first != 0 && dst.first == 0) {
569 KOKKOS_INLINE_FUNCTION
570 void operator() (
const LO i, value_type& dst)
const
572 const size_t offset = offsets[i];
573 const LO export_lid = export_lids[i];
574 const size_t buf_size = exports.size();
575 const size_t num_bytes = num_packets_per_lid(i);
576 const size_t num_ent =
577 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
578 - local_matrix.graph.row_map[export_lid]);
588 if (export_lid >= local_matrix.numRows ()) {
589 if (dst.first != 0) {
590 dst = Kokkos::make_pair (1, i);
594 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
595 if (dst.first != 0) {
596 dst = Kokkos::make_pair (2, i);
606 const auto row_beg = local_matrix.graph.row_map[export_lid];
607 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
608 auto vals_in = subview (local_matrix.values,
609 Kokkos::make_pair (row_beg, row_end));
610 auto lids_in = subview (local_matrix.graph.entries,
611 Kokkos::make_pair (row_beg, row_end));
612 typedef local_map_type LMT;
613 typedef BufferDeviceType BDT;
614 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
615 source_pids, vals_in, offset,
616 num_ent, num_bytes_per_value,
618 int error_code_this_row = p.first;
619 size_t num_bytes_packed_this_row = p.second;
620 if (error_code_this_row != 0) {
621 if (dst.first != 0) {
622 dst = Kokkos::make_pair (error_code_this_row, i);
625 else if (num_bytes_packed_this_row != num_bytes) {
626 if (dst.first != 0) {
627 dst = Kokkos::make_pair (3, i);
640 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
642 do_pack (
const LocalMatrix& local_matrix,
643 const LocalMap& local_map,
644 const Kokkos::View<char*, BufferDeviceType>& exports,
645 const typename PackTraits<
648 >::input_array_type& num_packets_per_lid,
649 const typename PackTraits<
650 typename LocalMap::local_ordinal_type,
652 >::input_array_type& export_lids,
653 const typename PackTraits<
655 typename LocalMatrix::device_type
656 >::input_array_type& source_pids,
657 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
658 const size_t num_bytes_per_value,
659 const bool pack_pids)
661 typedef typename LocalMap::local_ordinal_type LO;
662 typedef typename LocalMatrix::device_type DT;
663 typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
664 const char prefix[] =
"Tpetra::Details::do_pack: ";
666 if (export_lids.extent (0) != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION
668 (static_cast<size_t> (offsets.extent (0)) !=
669 static_cast<size_t> (export_lids.extent (0) + 1),
670 std::invalid_argument, prefix <<
"offsets.extent(0) = "
671 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
672 << export_lids.extent (0) <<
") + 1.");
673 TEUCHOS_TEST_FOR_EXCEPTION
674 (export_lids.extent (0) != num_packets_per_lid.extent (0),
675 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
676 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
677 << num_packets_per_lid.extent (0) <<
".");
681 TEUCHOS_TEST_FOR_EXCEPTION
682 (pack_pids && exports.extent (0) != 0 &&
683 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
684 "pack_pids is true, and exports.extent(0) = " <<
685 exports.extent (0) <<
" != 0, meaning that we need to pack at "
686 "least one matrix entry, but source_pids.extent(0) = 0.");
689 typedef PackCrsMatrixFunctor<LocalMatrix, LocalMap,
690 BufferDeviceType> pack_functor_type;
691 pack_functor_type f (local_matrix, local_map, exports,
692 num_packets_per_lid, export_lids,
693 source_pids, offsets, num_bytes_per_value,
696 typename pack_functor_type::value_type result;
697 range_type range (0, num_packets_per_lid.extent (0));
698 Kokkos::parallel_reduce (range, f, result);
700 if (result.first != 0) {
701 std::ostringstream os;
703 if (result.first == 1) {
704 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
706 const auto firstBadLid = export_lids_h(result.second);
707 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
710 else if (result.first == 2) {
711 auto offsets_h = Kokkos::create_mirror_view (offsets);
713 const auto firstBadOffset = offsets_h(result.second);
715 auto num_packets_per_lid_h =
716 Kokkos::create_mirror_view (num_packets_per_lid);
718 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
719 << firstBadOffset <<
", num_packets_per_lid(i) = "
720 << num_packets_per_lid_h(result.second) <<
", buf_size = "
724 TEUCHOS_TEST_FOR_EXCEPTION
725 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor reported "
726 "error code " << result.first <<
" for the first bad row "
727 << result.second <<
". " << os.str ());
760 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
762 packCrsMatrix (
const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
763 Kokkos::DualView<char*, BufferDeviceType>& exports,
764 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
765 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
766 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
767 size_t& constant_num_packets,
768 const bool pack_pids,
772 typedef BufferDeviceType DT;
773 typedef typename DT::execution_space execution_space;
774 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
775 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
776 constexpr
bool debug =
false;
778 auto local_matrix = sourceMatrix.getLocalMatrix ();
779 auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
784 constant_num_packets = 0;
786 const size_t num_export_lids =
787 static_cast<size_t> (export_lids.extent (0));
788 TEUCHOS_TEST_FOR_EXCEPTION
790 static_cast<size_t> (num_packets_per_lid.extent (0)),
791 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
792 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
793 << num_packets_per_lid.extent (0) <<
".");
794 if (num_export_lids != 0) {
795 TEUCHOS_TEST_FOR_EXCEPTION
796 (num_packets_per_lid.data () == NULL, std::invalid_argument,
797 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
798 "num_packets_per_lid.data() = "
799 << num_packets_per_lid.data () <<
" == NULL.");
806 size_t num_bytes_per_value = 0;
807 if (PackTraits<ST, DT>::compileTimeSize) {
821 size_t num_bytes_per_value_l = 0;
822 if (local_matrix.values.extent(0) > 0) {
823 const ST& val = local_matrix.values(0);
826 using Teuchos::reduceAll;
827 reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
829 num_bytes_per_value_l,
830 Teuchos::outArg (num_bytes_per_value));
833 if (num_export_lids == 0) {
837 execution_space::fence ();
838 exports = exports_view_type (
"exports", 0);
839 execution_space::fence ();
844 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
849 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
850 local_matrix.graph.row_map, export_lids,
852 num_bytes_per_lid, num_bytes_per_gid,
853 num_bytes_per_pid, num_bytes_per_value);
856 if (count > static_cast<size_t> (exports.extent (0))) {
860 execution_space::fence ();
861 exports = exports_view_type (
"exports", count);
863 std::ostringstream os;
864 os <<
"*** exports resized to " << count << std::endl;
865 std::cerr << os.str ();
867 execution_space::fence ();
870 std::ostringstream os;
871 os <<
"*** count: " << count <<
", exports.extent(0): "
872 << exports.extent (0) << std::endl;
873 std::cerr << os.str ();
879 TEUCHOS_TEST_FOR_EXCEPTION
880 (pack_pids && exports.extent (0) != 0 &&
881 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
882 "pack_pids is true, and exports.extent(0) = " <<
883 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
884 "one matrix entry, but export_pids.extent(0) = 0.");
886 typedef typename std::decay<decltype (local_matrix)>::type
888 typedef typename std::decay<decltype (local_col_map)>::type
891 exports.modify_device ();
892 auto exports_d = exports.view_device ();
893 do_pack<local_matrix_type, local_map_type, DT>
894 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
895 export_lids, export_pids, offsets, num_bytes_per_value,
902 template<
typename ST,
typename LO,
typename GO,
typename NT>
905 Teuchos::Array<char>& exports,
906 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
907 const Teuchos::ArrayView<const LO>& exportLIDs,
908 size_t& constantNumPackets,
912 using device_type =
typename local_matrix_type::device_type;
914 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
915 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
921 typename local_matrix_type::device_type outputDevice;
922 auto num_packets_per_lid_d =
924 numPacketsPerLID.getRawPtr (),
925 numPacketsPerLID.size (),
false,
926 "num_packets_per_lid");
935 exportLIDs.getRawPtr (),
936 exportLIDs.size (),
true,
938 static_assert (std::is_same<
typename decltype (export_lids_d)::device_type,
939 buffer_device_type>::value,
940 "export_lids_d has the wrong device_type.");
943 Kokkos::View<int*, device_type> export_pids_d (
"export_pids", 0);
945 Kokkos::DualView<char*, buffer_device_type> exports_dv (
"exports", 0);
946 constexpr
bool pack_pids =
false;
947 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
948 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
949 export_pids_d, constantNumPackets, pack_pids, distor);
953 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
954 (numPacketsPerLID.getRawPtr (),
955 numPacketsPerLID.size ());
963 if (static_cast<size_t> (exports.size ()) !=
964 static_cast<size_t> (exports_dv.extent (0))) {
965 exports.resize (exports_dv.extent (0));
967 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
972 template<
typename ST,
typename LO,
typename GO,
typename NT>
975 Kokkos::DualView<
char*,
977 const Kokkos::DualView<
size_t*,
979 const Kokkos::DualView<
const LO*,
981 size_t& constantNumPackets,
988 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
989 constexpr
bool pack_pids =
false;
992 auto numPacketsPerLID_nc = numPacketsPerLID;
993 numPacketsPerLID_nc.clear_sync_state ();
994 numPacketsPerLID_nc.modify_device ();
995 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
998 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
999 auto exportLIDs_d = exportLIDs.view_device ();
1001 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
1002 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
1003 exportPIDs_d, constantNumPackets, pack_pids, distor);
1006 template<
typename ST,
typename LO,
typename GO,
typename NT>
1010 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1011 const Teuchos::ArrayView<const LO>& exportLIDs,
1012 const Teuchos::ArrayView<const int>& sourcePIDs,
1013 size_t& constantNumPackets,
1018 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1019 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1021 typename local_matrix_type::device_type outputDevice;
1024 std::unique_ptr<std::string> prefix;
1026 const int myRank = [&] () {
1027 auto map = sourceMatrix.
getMap ();
1028 if (map.get () ==
nullptr) {
1031 auto comm = map->getComm ();
1032 if (comm.get () ==
nullptr) {
1035 return comm->getRank ();
1037 std::ostringstream os;
1038 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
1039 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1041 std::ostringstream os2;
1042 os2 << *prefix <<
"start" << std::endl;
1043 std::cerr << os2.str ();
1050 auto num_packets_per_lid_d =
1052 numPacketsPerLID.getRawPtr (),
1053 numPacketsPerLID.size (),
false,
1054 "num_packets_per_lid");
1058 auto export_lids_d =
1060 exportLIDs.getRawPtr (),
1061 exportLIDs.size (),
true,
1065 auto export_pids_d =
1067 sourcePIDs.getRawPtr (),
1068 sourcePIDs.size (),
true,
1070 constexpr
bool pack_pids =
true;
1072 PackCrsMatrixImpl::packCrsMatrix
1073 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1074 export_pids_d, constantNumPackets, pack_pids, distor);
1076 catch (std::exception& e) {
1078 std::ostringstream os;
1079 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1080 << e.what () << std::endl;
1081 std::cerr << os.str ();
1087 std::ostringstream os;
1088 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1089 "not a subclass of std::exception" << std::endl;
1090 std::cerr << os.str ();
1095 if (numPacketsPerLID.size () != 0) {
1099 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1100 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1103 catch (std::exception& e) {
1105 std::ostringstream os;
1106 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1107 std::cerr << os.str ();
1113 std::ostringstream os;
1114 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1115 "of std::exception" << std::endl;
1116 std::cerr << os.str ();
1123 std::ostringstream os;
1124 os << *prefix <<
"done" << std::endl;
1125 std::cerr << os.str ();
1132 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1134 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1135 Teuchos::Array<char>&, \
1136 const Teuchos::ArrayView<size_t>&, \
1137 const Teuchos::ArrayView<const LO>&, \
1141 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1142 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1143 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1144 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1148 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1149 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1150 const Teuchos::ArrayView<size_t>&, \
1151 const Teuchos::ArrayView<const LO>&, \
1152 const Teuchos::ArrayView<const int>&, \
1156 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
int getError() const
Host function for getting the error.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Compute the number of packets and offsets for the pack procedure.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
Kokkos::View< const value_type *, BufferDeviceType, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Base class for distributed Tpetra objects that support data redistribution.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.