42 #ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
43 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
55 #include "Kokkos_Core.hpp"
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
92 namespace UnpackAndCombineCrsMatrixImpl {
106 template<
class ST,
class LO,
class GO,
class DT,
class BDT>
108 unpackRow(
typename PackTraits<GO, DT>::output_array_type& gids_out,
110 typename PackTraits<ST, DT>::output_array_type& vals_out,
111 const Kokkos::View<const char*, BDT>& imports,
114 const size_t num_ent,
115 const size_t num_bytes_per_value)
121 bool unpack_pids = pids_out.size() > 0;
123 const size_t num_ent_beg = offset;
126 const size_t gids_beg = num_ent_beg + num_ent_len;
127 const size_t gids_len =
130 const size_t pids_beg = gids_beg + gids_len;
131 const size_t pids_len = unpack_pids ?
132 size_t (num_ent * PackTraits<int, BDT>::packValueCount (
int (0))) :
135 const size_t vals_beg = gids_beg + gids_len + pids_len;
136 const size_t vals_len = num_ent * num_bytes_per_value;
138 const char*
const num_ent_in = imports.data () + num_ent_beg;
139 const char*
const gids_in = imports.data () + gids_beg;
140 const char*
const pids_in = unpack_pids ? imports.data () + pids_beg : NULL;
141 const char*
const vals_in = imports.data () + vals_beg;
143 size_t num_bytes_out = 0;
146 if (static_cast<size_t> (num_ent_out) != num_ent) {
151 Kokkos::pair<int, size_t> p;
156 num_bytes_out += p.second;
163 num_bytes_out += p.second;
170 num_bytes_out += p.second;
173 const size_t expected_num_bytes = num_ent_len + gids_len + pids_len + vals_len;
174 if (num_bytes_out != expected_num_bytes) {
190 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
192 typedef LocalMatrix local_matrix_type;
195 typedef typename local_matrix_type::value_type ST;
196 typedef typename local_map_type::local_ordinal_type LO;
197 typedef typename local_map_type::global_ordinal_type GO;
198 typedef typename local_map_type::device_type DT;
199 typedef typename DT::execution_space XS;
201 typedef Kokkos::View<const size_t*, BufferDeviceType>
202 num_packets_per_lid_type;
203 typedef Kokkos::View<const size_t*, DT> offsets_type;
204 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
205 typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
207 typedef Kokkos::View<LO*, DT> lids_scratch_type;
208 typedef Kokkos::View<GO*, DT> gids_scratch_type;
209 typedef Kokkos::View<int*,DT> pids_scratch_type;
210 typedef Kokkos::View<ST*, DT> vals_scratch_type;
212 typedef Kokkos::pair<int, LO> value_type;
214 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
215 "LocalMap::local_ordinal_type and "
216 "LocalMatrix::ordinal_type must be the same.");
218 local_matrix_type local_matrix;
220 input_buffer_type imports;
221 num_packets_per_lid_type num_packets_per_lid;
222 import_lids_type import_lids;
223 offsets_type offsets;
227 size_t num_bytes_per_value;
229 Kokkos::Experimental::UniqueToken<XS, Kokkos::Experimental::UniqueTokenScope::Global> tokens;
230 lids_scratch_type lids_scratch;
231 gids_scratch_type gids_scratch;
232 pids_scratch_type pids_scratch;
233 vals_scratch_type vals_scratch;
236 const local_matrix_type& local_matrix_in,
238 const input_buffer_type& imports_in,
239 const num_packets_per_lid_type& num_packets_per_lid_in,
240 const import_lids_type& import_lids_in,
241 const offsets_type& offsets_in,
243 const size_t max_num_ent_in,
244 const bool unpack_pids_in,
245 const size_t num_bytes_per_value_in,
246 const bool atomic_in) :
247 local_matrix (local_matrix_in),
248 local_col_map (local_col_map_in),
249 imports (imports_in),
250 num_packets_per_lid (num_packets_per_lid_in),
251 import_lids (import_lids_in),
252 offsets (offsets_in),
253 combine_mode (combine_mode_in),
254 max_num_ent (max_num_ent_in),
255 unpack_pids (unpack_pids_in),
256 num_bytes_per_value (num_bytes_per_value_in),
259 lids_scratch (Kokkos::view_alloc(
"lids_scratch", Kokkos::WithoutInitializing), tokens.size() * max_num_ent),
260 gids_scratch (Kokkos::view_alloc(
"gids_scratch", Kokkos::WithoutInitializing), tokens.size() * max_num_ent),
261 pids_scratch (Kokkos::view_alloc(
"pids_scratch", Kokkos::WithoutInitializing), tokens.size() * max_num_ent),
262 vals_scratch (Kokkos::view_alloc(
"vals_scratch", Kokkos::WithoutInitializing), tokens.size() * max_num_ent)
265 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const
267 using Tpetra::Details::OrdinalTraits;
268 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
271 KOKKOS_INLINE_FUNCTION
void
272 join (
volatile value_type& dst,
const volatile value_type& src)
const
278 using Tpetra::Details::OrdinalTraits;
279 if (src.second != OrdinalTraits<LO>::invalid ()) {
284 if (dst.second == OrdinalTraits<LO>::invalid () ||
285 src.second < dst.second) {
291 KOKKOS_INLINE_FUNCTION
292 void operator()(
const LO i, value_type& dst)
const
295 using Kokkos::subview;
296 using Kokkos::MemoryUnmanaged;
297 typedef typename XS::size_type size_type;
298 typedef typename Kokkos::pair<size_type, size_type> slice;
299 typedef BufferDeviceType BDT;
301 typedef View<LO*, DT, MemoryUnmanaged> lids_out_type;
302 typedef View<int*,DT, MemoryUnmanaged> pids_out_type;
303 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
304 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
306 const size_t num_bytes = num_packets_per_lid(i);
309 if (num_bytes == 0) {
314 const LO import_lid = import_lids[i];
315 const size_t buf_size = imports.size();
316 const size_t offset = offsets(i);
320 const char*
const in_buf = imports.data () + offset;
322 const size_t num_ent =
static_cast<size_t> (num_ent_LO);
325 size_t expected_num_bytes = 0;
335 if (expected_num_bytes > num_bytes) {
336 dst = Kokkos::make_pair (1, i);
340 if (offset > buf_size || offset + num_bytes > buf_size) {
341 dst = Kokkos::make_pair (2, i);
348 const size_type token = tokens.acquire();
349 const size_t a =
static_cast<size_t>(token) * max_num_ent;
350 const size_t b = a + num_ent;
351 lids_out_type lids_out = subview(lids_scratch, slice(a, b));
352 gids_out_type gids_out = subview(gids_scratch, slice(a, b));
353 pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
354 vals_out_type vals_out = subview(vals_scratch, slice(a, b));
358 unpackRow<ST,LO,GO,DT,BDT>(gids_out, pids_out, vals_out,
359 imports, offset, num_bytes,
360 num_ent, num_bytes_per_value);
361 if (unpack_err != 0) {
362 dst = Kokkos::make_pair (unpack_err, i);
363 tokens.release (token);
370 for (
size_t k = 0; k < num_ent; ++k) {
375 const LO*
const lids_raw =
const_cast<const LO*
> (lids_out.data ());
376 const ST*
const vals_raw =
const_cast<const ST*
> (vals_out.data ());
378 if (combine_mode ==
ADD) {
380 local_matrix.sumIntoValues (import_lid, lids_raw, num_ent,
381 vals_raw,
false, atomic);
383 else if (combine_mode ==
REPLACE) {
385 local_matrix.replaceValues (import_lid, lids_raw, num_ent,
386 vals_raw,
false, atomic);
389 dst = Kokkos::make_pair (4, i);
390 tokens.release (token);
394 tokens.release (token);
398 struct MaxNumEntTag {};
399 struct TotNumEntTag {};
409 template<
class LO,
class DT,
class BDT>
412 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
413 typedef Kokkos::View<const size_t*, DT> offsets_type;
414 typedef Kokkos::View<const char*, BDT> input_buffer_type;
417 typedef size_t value_type;
420 typedef Kokkos::pair<size_t,size_t> slice;
422 num_packets_per_lid_type num_packets_per_lid;
423 offsets_type offsets;
424 input_buffer_type imports;
428 const offsets_type& offsets_in,
429 const input_buffer_type& imports_in) :
430 num_packets_per_lid (num_packets_per_lid_in),
431 offsets (offsets_in),
435 KOKKOS_INLINE_FUNCTION
void
436 operator() (
const MaxNumEntTag,
const LO i, value_type& update)
const {
438 const size_t num_bytes = num_packets_per_lid(i);
441 const char*
const in_buf = imports.data () + offsets(i);
443 const size_t num_ent =
static_cast<size_t> (num_ent_LO);
445 update = (update < num_ent) ? num_ent : update;
449 KOKKOS_INLINE_FUNCTION
void
450 join (
const MaxNumEntTag,
451 volatile value_type& dst,
452 const volatile value_type& src)
const
454 if (dst < src) dst = src;
457 KOKKOS_INLINE_FUNCTION
void
458 operator() (
const TotNumEntTag,
const LO i, value_type& tot_num_ent)
const {
460 const size_t num_bytes = num_packets_per_lid(i);
463 const char*
const in_buf = imports.data () + offsets(i);
465 tot_num_ent +=
static_cast<size_t> (num_ent_LO);
477 template<
class LO,
class DT,
class BDT>
479 compute_maximum_num_entries (
480 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
481 const Kokkos::View<const size_t*, DT>& offsets,
482 const Kokkos::View<const char*, BDT>& imports)
484 typedef typename DT::execution_space XS;
485 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
486 MaxNumEntTag> range_policy;
490 const LO numRowsToUnpack =
491 static_cast<LO
> (num_packets_per_lid.extent (0));
492 size_t max_num_ent = 0;
493 Kokkos::parallel_reduce (
"Max num entries in CRS",
494 range_policy (0, numRowsToUnpack),
495 functor, max_num_ent);
506 template<
class LO,
class DT,
class BDT>
508 compute_total_num_entries (
509 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
510 const Kokkos::View<const size_t*, DT>& offsets,
511 const Kokkos::View<const char*, BDT>& imports)
513 typedef typename DT::execution_space XS;
514 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> range_policy;
515 size_t tot_num_ent = 0;
516 NumEntriesFunctor<LO, DT, BDT> functor (num_packets_per_lid, offsets,
518 const LO numRowsToUnpack =
519 static_cast<LO
> (num_packets_per_lid.extent (0));
520 Kokkos::parallel_reduce (
"Total num entries in CRS to unpack",
521 range_policy (0, numRowsToUnpack),
522 functor, tot_num_ent);
533 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
535 unpackAndCombineIntoCrsMatrix(
536 const LocalMatrix& local_matrix,
537 const LocalMap& local_map,
538 const Kokkos::View<const char*, BufferDeviceType>& imports,
539 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
540 const typename PackTraits<typename LocalMap::local_ordinal_type, BufferDeviceType>::input_array_type import_lids,
542 const bool unpack_pids,
545 typedef typename LocalMatrix::value_type ST;
546 typedef typename LocalMap::local_ordinal_type LO;
547 typedef typename LocalMap::device_type DT;
548 typedef typename DT::execution_space XS;
549 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO> > range_policy;
550 typedef UnpackCrsMatrixAndCombineFunctor<LocalMatrix, LocalMap, BufferDeviceType> unpack_functor_type;
552 const char prefix[] =
553 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix: ";
555 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
556 if (num_import_lids == 0) {
563 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
ABSMAX,
564 std::invalid_argument,
565 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a "
566 "static graph (i.e., was constructed with the CrsMatrix constructor "
567 "that takes a const CrsGraph pointer).");
569 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
INSERT,
570 std::invalid_argument,
571 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph "
572 "(i.e., was constructed with the CrsMatrix constructor that takes a "
573 "const CrsGraph pointer).");
576 TEUCHOS_TEST_FOR_EXCEPTION(!(combine_mode ==
ADD || combine_mode ==
REPLACE),
577 std::invalid_argument,
578 prefix <<
"Invalid combine mode; should never get "
579 "here! Please report this bug to the Tpetra developers.");
582 bool bad_num_import_lids =
583 num_import_lids !=
static_cast<size_t>(num_packets_per_lid.extent(0));
584 TEUCHOS_TEST_FOR_EXCEPTION(bad_num_import_lids,
585 std::invalid_argument,
586 prefix <<
"importLIDs.size() (" << num_import_lids <<
") != "
587 "numPacketsPerLID.size() (" << num_packets_per_lid.extent(0) <<
").");
591 Kokkos::View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
597 size_t max_num_ent = compute_maximum_num_entries<LO,DT>(
598 num_packets_per_lid, offsets, imports);
605 unpack_functor_type f(local_matrix, local_map,
606 imports, num_packets_per_lid, import_lids, offsets, combine_mode,
607 max_num_ent, unpack_pids, num_bytes_per_value, atomic);
609 typename unpack_functor_type::value_type x;
610 Kokkos::parallel_reduce(range_policy(0, static_cast<LO>(num_import_lids)), f, x);
611 auto x_h = x.to_std_pair();
612 TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
613 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code "
614 << x_h.first <<
" for the first bad row " << x_h.second);
619 template<
class LocalMatrix,
class BufferDeviceType>
622 const LocalMatrix& local_matrix,
623 const typename PackTraits<typename LocalMatrix::ordinal_type, typename LocalMatrix::device_type>::input_array_type permute_from_lids,
624 const Kokkos::View<const char*, BufferDeviceType>& imports,
625 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
626 const size_t num_same_ids)
628 using Kokkos::parallel_reduce;
629 typedef typename LocalMatrix::ordinal_type LO;
630 typedef typename LocalMatrix::device_type device_type;
631 typedef typename device_type::execution_space XS;
632 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
633 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO> > range_policy;
634 typedef BufferDeviceType BDT;
640 num_items =
static_cast<LO
>(num_same_ids);
643 parallel_reduce(range_policy(0, num_items),
644 KOKKOS_LAMBDA(
const LO lid,
size_t& update) {
645 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid+1]
646 -local_matrix.graph.row_map[lid]);
652 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
655 parallel_reduce(range_policy(0, num_items),
656 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
657 const LO lid = permute_from_lids(i);
658 update +=
static_cast<size_t> (local_matrix.graph.row_map[lid+1]
659 - local_matrix.graph.row_map[lid]);
666 const size_type np = num_packets_per_lid.extent(0);
667 Kokkos::View<size_t*, device_type> offsets(
"offsets", np+1);
670 compute_total_num_entries<LO, device_type, BDT> (num_packets_per_lid,
677 template<
class LO,
class DT,
class BDT>
678 KOKKOS_INLINE_FUNCTION
680 unpackRowCount(
const Kokkos::View<const char*, BDT>& imports,
682 const size_t num_bytes)
687 if (p_num_bytes > num_bytes) {
688 return OrdinalTraits<size_t>::invalid();
690 const char*
const in_buf = imports.data () + offset;
691 (void) PackTraits<LO,DT>::unpackValue(num_ent_LO, in_buf);
693 return static_cast<size_t>(num_ent_LO);
697 template<
class LO,
class DT,
class BDT>
699 setupRowPointersForRemotes(
700 const typename PackTraits<size_t, DT>::output_array_type& tgt_rowptr,
701 const typename PackTraits<LO, DT>::input_array_type& import_lids,
702 const Kokkos::View<const char*, BDT>& imports,
703 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
704 const typename PackTraits<size_t, DT>::input_array_type& offsets)
706 using Kokkos::parallel_reduce;
707 typedef typename DT::execution_space XS;
708 typedef typename PackTraits<size_t,DT>::input_array_type::size_type size_type;
709 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
711 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
712 const size_type N = num_packets_per_lid.extent(0);
715 parallel_reduce (
"Setup row pointers for remotes",
717 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
718 typedef typename std::remove_reference< decltype( tgt_rowptr(0) ) >::type atomic_incr_type;
719 const size_t num_bytes = num_packets_per_lid(i);
720 const size_t offset = offsets(i);
721 const size_t num_ent = unpackRowCount<LO, DT, BDT> (imports, offset, num_bytes);
722 if (num_ent == InvalidNum) {
725 Kokkos::atomic_fetch_add (&tgt_rowptr (import_lids(i)), atomic_incr_type(num_ent));
733 makeCrsRowPtrFromLengths(
734 const typename PackTraits<size_t,DT>::output_array_type& tgt_rowptr,
735 const Kokkos::View<size_t*,DT>& new_start_row)
737 using Kokkos::parallel_scan;
738 typedef typename DT::execution_space XS;
739 typedef typename Kokkos::View<size_t*,DT>::size_type size_type;
740 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
741 const size_type N = new_start_row.extent(0);
742 parallel_scan(range_policy(0, N),
743 KOKKOS_LAMBDA(
const size_t& i,
size_t& update,
const bool&
final) {
744 auto cur_val = tgt_rowptr(i);
746 tgt_rowptr(i) = update;
747 new_start_row(i) = tgt_rowptr(i);
754 template<
class LocalMatrix,
class LocalMap>
757 const typename PackTraits<typename LocalMap::global_ordinal_type, typename LocalMap::device_type>::output_array_type& tgt_colind,
758 const typename PackTraits<int, typename LocalMap::device_type>::output_array_type& tgt_pids,
759 const typename PackTraits<typename LocalMatrix::value_type, typename LocalMap::device_type>::output_array_type& tgt_vals,
760 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
761 const typename PackTraits<size_t, typename LocalMap::device_type>::output_array_type& tgt_rowptr,
762 const typename PackTraits<int, typename LocalMap::device_type>::input_array_type& src_pids,
763 const LocalMatrix& local_matrix,
764 const LocalMap& local_col_map,
765 const size_t num_same_ids,
768 using Kokkos::parallel_for;
769 typedef typename LocalMap::device_type DT;
770 typedef typename LocalMap::local_ordinal_type LO;
771 typedef typename DT::execution_space XS;
772 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
774 parallel_for(range_policy(0, num_same_ids),
775 KOKKOS_LAMBDA(
const size_t i) {
776 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
778 const LO src_lid =
static_cast<LO
>(i);
779 size_t src_row = local_matrix.graph.row_map(src_lid);
781 const LO tgt_lid =
static_cast<LO
>(i);
782 const size_t tgt_row = tgt_rowptr(tgt_lid);
784 const size_t nsr = local_matrix.graph.row_map(src_lid+1)
785 - local_matrix.graph.row_map(src_lid);
786 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
788 for (
size_t j=local_matrix.graph.row_map(src_lid);
789 j<local_matrix.graph.row_map(src_lid+1); ++j) {
790 LO src_col = local_matrix.graph.entries(j);
791 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
792 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
793 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
799 template<
class LocalMatrix,
class LocalMap>
801 copyDataFromPermuteIDs(
802 const typename PackTraits<typename LocalMap::global_ordinal_type, typename LocalMap::device_type>::output_array_type& tgt_colind,
803 const typename PackTraits<int, typename LocalMap::device_type>::output_array_type& tgt_pids,
804 const typename PackTraits<typename LocalMatrix::value_type, typename LocalMap::device_type>::output_array_type& tgt_vals,
805 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
806 const typename PackTraits<size_t, typename LocalMap::device_type>::output_array_type& tgt_rowptr,
807 const typename PackTraits<int, typename LocalMap::device_type>::input_array_type& src_pids,
808 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& permute_to_lids,
809 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& permute_from_lids,
810 const LocalMatrix& local_matrix,
811 const LocalMap& local_col_map,
814 using Kokkos::parallel_for;
815 typedef typename LocalMap::device_type DT;
816 typedef typename LocalMap::local_ordinal_type LO;
817 typedef typename DT::execution_space XS;
818 typedef typename PackTraits<LO,DT>::input_array_type::size_type size_type;
819 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
821 const size_type num_permute_to_lids = permute_to_lids.extent(0);
823 parallel_for(range_policy(0, num_permute_to_lids),
824 KOKKOS_LAMBDA(
const size_t i) {
825 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
827 const LO src_lid = permute_from_lids(i);
828 const size_t src_row = local_matrix.graph.row_map(src_lid);
830 const LO tgt_lid = permute_to_lids(i);
831 const size_t tgt_row = tgt_rowptr(tgt_lid);
833 size_t nsr = local_matrix.graph.row_map(src_lid+1)
834 - local_matrix.graph.row_map(src_lid);
835 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
837 for (
size_t j=local_matrix.graph.row_map(src_lid);
838 j<local_matrix.graph.row_map(src_lid+1); ++j) {
839 LO src_col = local_matrix.graph.entries(j);
840 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
841 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
842 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
848 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
850 unpackAndCombineIntoCrsArrays2(
851 const typename PackTraits<typename LocalMap::global_ordinal_type, typename LocalMap::device_type>::output_array_type& tgt_colind,
852 const typename PackTraits<int, typename LocalMap::device_type>::output_array_type& tgt_pids,
853 const typename PackTraits<typename LocalMatrix::value_type, typename LocalMap::device_type>::output_array_type& tgt_vals,
854 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
855 const typename PackTraits<size_t, typename LocalMap::device_type>::input_array_type& offsets,
856 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& import_lids,
857 const Kokkos::View<const char*, BufferDeviceType>& imports,
858 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
862 const size_t num_bytes_per_value)
865 using Kokkos::subview;
866 using Kokkos::MemoryUnmanaged;
867 using Kokkos::parallel_reduce;
868 using Kokkos::atomic_fetch_add;
870 typedef typename LocalMap::device_type DT;
871 typedef typename LocalMap::local_ordinal_type LO;
872 typedef typename LocalMap::global_ordinal_type GO;
873 typedef typename LocalMatrix::value_type ST;
874 typedef typename DT::execution_space XS;
875 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
876 typedef typename Kokkos::pair<size_type, size_type> slice;
877 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
878 typedef BufferDeviceType BDT;
880 typedef View<int*,DT, MemoryUnmanaged> pids_out_type;
881 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
882 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
884 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
887 const size_type num_import_lids = import_lids.size();
890 parallel_reduce (
"Unpack and combine into CRS",
891 range_policy (0, num_import_lids),
892 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
893 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
894 const size_t num_bytes = num_packets_per_lid(i);
895 const size_t offset = offsets(i);
896 if (num_bytes == 0) {
900 size_t num_ent = unpackRowCount<LO,DT,BDT>(imports, offset, num_bytes);
901 if (num_ent == InvalidNum) {
905 const LO lcl_row = import_lids(i);
906 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
907 const size_t end_row = start_row + num_ent;
909 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
910 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
911 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
913 k_error += unpackRow<ST,LO,GO,DT,BDT>(gids_out, pids_out, vals_out,
914 imports, offset, num_bytes,
915 num_ent, num_bytes_per_value);
918 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
919 const int pid = pids_out(j);
920 pids_out(j) = (pid != my_pid) ? pid : -1;
927 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
930 const LocalMatrix & local_matrix,
931 const LocalMap & local_col_map,
932 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& import_lids,
933 const Kokkos::View<const char*, BufferDeviceType>& imports,
934 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
935 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& permute_to_lids,
936 const typename PackTraits<typename LocalMap::local_ordinal_type, typename LocalMap::device_type>::input_array_type& permute_from_lids,
937 const typename PackTraits<size_t, typename LocalMap::device_type>::output_array_type& tgt_rowptr,
938 const typename PackTraits<typename LocalMap::global_ordinal_type, typename LocalMap::device_type>::output_array_type& tgt_colind,
939 const typename PackTraits<typename LocalMatrix::value_type, typename LocalMap::device_type>::output_array_type& tgt_vals,
940 const typename PackTraits<int, typename LocalMap::device_type>::input_array_type& src_pids,
941 const typename PackTraits<int, typename LocalMap::device_type>::output_array_type& tgt_pids,
942 const size_t num_same_ids,
943 const size_t tgt_num_rows,
944 const size_t tgt_num_nonzeros,
945 const int my_tgt_pid,
946 const size_t num_bytes_per_value)
949 using Kokkos::subview;
950 using Kokkos::parallel_for;
951 using Kokkos::MemoryUnmanaged;
953 typedef typename LocalMap::device_type DT;
954 typedef typename LocalMap::local_ordinal_type LO;
955 typedef typename DT::execution_space XS;
956 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
957 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
958 typedef BufferDeviceType BDT;
960 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
962 const size_t N = tgt_num_rows;
963 const size_t mynnz = tgt_num_nonzeros;
967 const int my_pid = my_tgt_pid;
970 parallel_for(range_policy(0, N+1),
971 KOKKOS_LAMBDA(
const size_t i) {
977 parallel_for(range_policy(0, num_same_ids),
978 KOKKOS_LAMBDA(
const size_t i) {
979 const LO tgt_lid =
static_cast<LO
>(i);
980 const LO src_lid =
static_cast<LO
>(i);
981 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
982 - local_matrix.graph.row_map(src_lid);
987 const size_type num_permute_to_lids = permute_to_lids.extent(0);
988 parallel_for(range_policy(0, num_permute_to_lids),
989 KOKKOS_LAMBDA(
const size_t i) {
990 const LO tgt_lid = permute_to_lids(i);
991 const LO src_lid = permute_from_lids(i);
992 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
993 - local_matrix.graph.row_map(src_lid);
998 const size_type num_import_lids = import_lids.extent(0);
999 View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
1002 #ifdef HAVE_TPETRA_DEBUG
1004 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1005 const bool condition =
1006 nth_offset_h !=
static_cast<size_t>(imports.extent (0));
1007 TEUCHOS_TEST_FOR_EXCEPTION
1008 (condition, std::logic_error, prefix
1009 <<
"The final offset in bytes " << nth_offset_h
1010 <<
" != imports.size() = " << imports.extent(0)
1011 <<
". Please report this bug to the Tpetra developers.");
1013 #endif // HAVE_TPETRA_DEBUG
1017 setupRowPointersForRemotes<LO,DT,BDT>(tgt_rowptr,
1018 import_lids, imports, num_packets_per_lid, offsets);
1019 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix
1020 <<
" Error transferring data to target row pointers. "
1021 "Please report this bug to the Tpetra developers.");
1025 View<size_t*, DT> new_start_row (
"new_start_row", N+1);
1028 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1030 auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
1031 bool condition = nth_tgt_rowptr_h != mynnz;
1032 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
1033 prefix <<
"CRS_rowptr[last] = " <<
1034 nth_tgt_rowptr_h <<
"!= mynnz = " << mynnz <<
".");
1038 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1039 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1041 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1042 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1043 local_matrix, local_col_map, my_pid);
1045 if (imports.extent(0) <= 0) {
1049 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1050 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1051 local_matrix, local_col_map, my_pid, num_bytes_per_value);
1052 TEUCHOS_TEST_FOR_EXCEPTION(
1053 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This "
1054 "should never happen. Please report this bug to the Tpetra developers.");
1100 template<
typename ST,
typename LO,
typename GO,
typename Node>
1104 const Teuchos::ArrayView<const char>& imports,
1105 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1106 const Teuchos::ArrayView<const LO>& importLIDs,
1113 typedef typename Node::device_type device_type;
1115 static_assert (std::is_same<device_type, typename local_matrix_type::device_type>::value,
1116 "Node::device_type and LocalMatrix::device_type must be the same.");
1119 typedef typename device_type::execution_space XS;
1122 typename XS::device_type outputDevice;
1127 auto num_packets_per_lid_d =
1129 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1131 auto import_lids_d =
1133 importLIDs.size(),
true,
"import_lids");
1137 imports.size(),
true,
"imports");
1140 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1143 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1144 local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1145 import_lids_d, combineMode,
false, atomic);
1150 template<
typename ST,
typename LO,
typename GO,
typename NT>
1153 const Kokkos::DualView<
const char*,
1155 const Kokkos::DualView<
const size_t*,
1157 const Kokkos::DualView<
const LO*,
1168 using device_type =
typename crs_matrix_type::device_type;
1169 using local_matrix_type =
typename crs_matrix_type::local_matrix_type;
1170 using buffer_device_type =
typename dist_object_type::buffer_device_type;
1173 (std::is_same<device_type, typename local_matrix_type::device_type>::value,
1174 "crs_matrix_type::device_type and local_matrix_type::device_type "
1175 "must be the same.");
1179 numPacketsPerLID_nc.sync_device ();
1181 auto num_packets_per_lid_d = numPacketsPerLID.view_device ();
1183 TEUCHOS_ASSERT( ! importLIDs.need_sync_device () );
1184 auto import_lids_d = importLIDs.view_device ();
1188 imports_nc.sync_device ();
1190 auto imports_d = imports.view_device ();
1193 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
1194 typedef decltype (local_col_map) local_map_type;
1197 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1201 > (local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1202 import_lids_d, combineMode, false, atomic);
1260 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1263 const
CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & sourceMatrix,
1264 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
1265 const Teuchos::ArrayView<const
char> &imports,
1266 const Teuchos::ArrayView<const
size_t>& numPacketsPerLID,
1271 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1272 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
1274 using Kokkos::MemoryUnmanaged;
1276 typedef typename Node::device_type DT;
1278 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1280 TEUCHOS_TEST_FOR_EXCEPTION
1281 (permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1282 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != "
1283 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
1286 const bool locallyIndexed = sourceMatrix.isLocallyIndexed ();
1287 TEUCHOS_TEST_FOR_EXCEPTION
1288 (! locallyIndexed, std::invalid_argument, prefix <<
"The input "
1289 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1290 TEUCHOS_TEST_FOR_EXCEPTION
1291 (importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
1292 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != "
1293 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
1295 auto local_matrix = sourceMatrix.getLocalMatrix ();
1296 auto permute_from_lids_d =
1298 permuteFromLIDs.getRawPtr (),
1299 permuteFromLIDs.size (),
true,
1300 "permute_from_lids");
1303 imports.getRawPtr (),
1304 imports.size (),
true,
1306 auto num_packets_per_lid_d =
1308 numPacketsPerLID.getRawPtr (),
1309 numPacketsPerLID.size (),
true,
1310 "num_packets_per_lid");
1313 local_matrix, permute_from_lids_d, imports_d,
1314 num_packets_per_lid_d, numSameIDs);
1331 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1335 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
1336 const Teuchos::ArrayView<const char>& imports,
1337 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1341 const size_t numSameIDs,
1342 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1343 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
1344 size_t TargetNumRows,
1345 size_t TargetNumNonzeros,
1346 const int MyTargetPID,
1347 const Teuchos::ArrayView<size_t>& CRS_rowptr,
1348 const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
1350 const Teuchos::ArrayView<const int>& SourcePids,
1351 Teuchos::Array<int>& TargetPids)
1358 using Teuchos::ArrayView;
1359 using Teuchos::outArg;
1360 using Teuchos::REDUCE_MAX;
1361 using Teuchos::reduceAll;
1363 typedef LocalOrdinal LO;
1365 typedef typename Node::device_type DT;
1366 typedef typename DT::execution_space XS;
1369 typedef typename matrix_type::impl_scalar_type ST;
1370 typedef typename ArrayView<const LO>::size_type size_type;
1372 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
1374 TEUCHOS_TEST_FOR_EXCEPTION(
1375 TargetNumRows + 1 != static_cast<size_t> (CRS_rowptr.size ()),
1376 std::invalid_argument, prefix <<
"CRS_rowptr.size() = " <<
1377 CRS_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
1379 TEUCHOS_TEST_FOR_EXCEPTION(
1380 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1381 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
1382 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
1383 const size_type numImportLIDs = importLIDs.size ();
1385 TEUCHOS_TEST_FOR_EXCEPTION(
1386 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
1387 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != "
1388 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
1391 if (static_cast<size_t> (TargetPids.size ()) != TargetNumNonzeros) {
1392 TargetPids.resize (TargetNumNonzeros);
1394 TargetPids.assign (TargetNumNonzeros, -1);
1398 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1401 typename XS::device_type outputDevice;
1402 auto import_lids_d =
1404 importLIDs.size(),
true,
"import_lids");
1408 imports.size(),
true,
"imports");
1410 auto num_packets_per_lid_d =
1412 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1414 auto permute_from_lids_d =
1416 permuteFromLIDs.size(),
true,
"permute_from_lids");
1418 auto permute_to_lids_d =
1420 permuteToLIDs.size(),
true,
"permute_to_lids");
1424 CRS_rowptr.size(),
true,
"crs_rowptr");
1428 CRS_colind.size(),
true,
"crs_colidx");
1430 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1431 static_assert (! std::is_same<
1432 typename std::remove_const<
1433 typename std::decay<
1437 std::complex<double> >::value,
1438 "CRS_vals::value_type is std::complex<double>; this should never happen"
1439 ", since std::complex does not work in Kokkos::View objects.");
1440 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE
1444 CRS_vals.size(),
true,
"crs_vals");
1446 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1447 static_assert (! std::is_same<
1448 typename decltype (crs_vals_d)::non_const_value_type,
1449 std::complex<double> >::value,
1450 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1451 "never happen, since std::complex does not work in Kokkos::View objects.");
1452 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE
1456 SourcePids.size(),
true,
"src_pids");
1460 TargetPids.size(),
true,
"tgt_pids");
1462 size_t num_bytes_per_value = 0;
1476 size_t num_bytes_per_value_l = 0;
1477 if (local_matrix.values.extent(0) > 0) {
1478 const ST& val = local_matrix.values(0);
1481 const ST& val = crs_vals_d(0);
1484 Teuchos::reduceAll<int, size_t>(*(sourceMatrix.
getComm()),
1485 Teuchos::REDUCE_MAX,
1486 num_bytes_per_value_l,
1487 outArg(num_bytes_per_value));
1490 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1491 static_assert (! std::is_same<
1492 typename decltype (crs_vals_d)::non_const_value_type,
1493 std::complex<double> >::value,
1494 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1495 "never happen, since std::complex does not work in Kokkos::View objects.");
1496 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE
1499 local_matrix, local_col_map, import_lids_d, imports_d,
1500 num_packets_per_lid_d, permute_to_lids_d, permute_from_lids_d,
1501 crs_rowptr_d, crs_colind_d, crs_vals_d, src_pids_d, tgt_pids_d,
1502 numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID,
1503 num_bytes_per_value);
1506 typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1507 CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1510 typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1511 CRS_colind.getRawPtr(), CRS_colind.size());
1514 typename decltype(crs_vals_d)::HostMirror crs_vals_h(
1515 CRS_vals.getRawPtr(), CRS_vals.size());
1518 typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1519 TargetPids.getRawPtr(), TargetPids.size());
1527 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT( ST, LO, GO, NT ) \
1529 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT> ( \
1530 const CrsMatrix<ST, LO, GO, NT>&, \
1531 const Teuchos::ArrayView<const char>&, \
1532 const Teuchos::ArrayView<const size_t>&, \
1533 const Teuchos::ArrayView<const LO>&, \
1539 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT> ( \
1540 const CrsMatrix<ST, LO, GO, NT>&, \
1541 const Kokkos::DualView<const char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1542 const Kokkos::DualView<const size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1543 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1546 const CombineMode, \
1549 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT> ( \
1550 const CrsMatrix<ST, LO, GO, NT> &, \
1551 const Teuchos::ArrayView<const LO>&, \
1552 const Teuchos::ArrayView<const char>&, \
1553 const Teuchos::ArrayView<const size_t>&, \
1556 const CombineMode, \
1558 const Teuchos::ArrayView<const LO>&, \
1559 const Teuchos::ArrayView<const LO>&, \
1563 const Teuchos::ArrayView<size_t>&, \
1564 const Teuchos::ArrayView<GO>&, \
1565 const Teuchos::ArrayView<CrsMatrix<ST, LO, GO, NT>::impl_scalar_type>&, \
1566 const Teuchos::ArrayView<const int>&, \
1567 Teuchos::Array<int>&); \
1569 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT> ( \
1570 const CrsMatrix<ST, LO, GO, NT> &, \
1571 const Teuchos::ArrayView<const LO> &, \
1572 const Teuchos::ArrayView<const char> &, \
1573 const Teuchos::ArrayView<const size_t>&, \
1578 const Teuchos::ArrayView<const LO>&, \
1579 const Teuchos::ArrayView<const LO>&);
1581 #endif // TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
"Local" part of Map suitable for Kokkos kernels.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Replace old value with maximum of magnitudes of old and new values.
local_matrix_type getLocalMatrix() const
The local sparse matrix.
Replace existing values with new values.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Kokkos::View< value_type *, DT, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, const bool atomic)
Unpack the imported column indices and values, and combine into matrix.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Base class for distributed Tpetra objects that support data redistribution.
Unpacks and combines a single row of the CrsMatrix.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...