42 #ifndef TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP 
   43 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP 
   45 #include "TpetraCore_config.h" 
   46 #include "Teuchos_Array.hpp" 
   47 #include "Teuchos_ArrayView.hpp" 
   56 #include "Kokkos_Core.hpp" 
   81 #ifndef DOXYGEN_SHOULD_SKIP_THIS 
   84 #endif // DOXYGEN_SHOULD_SKIP_THIS 
   91 namespace UnpackAndCombineCrsGraphImpl {
 
  102 template<
class Packet, 
class GO, 
class Device, 
class BufferDevice>
 
  104 unpackRow (
const Kokkos::View<GO*,Device,Kokkos::MemoryUnmanaged>& gids_out,
 
  105            const Kokkos::View<int*,Device,Kokkos::MemoryUnmanaged>& pids_out,
 
  106            const Kokkos::View<const Packet*,BufferDevice>& imports,
 
  108            const size_t num_ent)
 
  110   using size_type = 
typename Kokkos::View<GO*,Device>::size_type;
 
  118   for (size_type k=0; k<num_ent; k++)
 
  119     gids_out(k) = imports(offset+k);
 
  122   if (pids_out.size() > 0) {
 
  123     for (size_type k=0; k<num_ent; k++) {
 
  124       pids_out(k) = 
static_cast<int>(imports(offset+num_ent+k));
 
  141 template<
class LocalOrdinal,
 
  148   using LO = LocalOrdinal;
 
  149   using GO = 
typename IndicesView::value_type;
 
  150   using packet_type = Packet;
 
  151   using row_ptrs_type = RowView;
 
  152   using indices_type = IndicesView;
 
  153   using buffer_device_type = BufferDevice;
 
  155   using device_type = 
typename IndicesView::device_type;
 
  156   using execution_space = 
typename device_type::execution_space;
 
  158   using num_packets_per_lid_type = Kokkos::View<const size_t*, buffer_device_type>;
 
  159   using offsets_type = Kokkos::View<const size_t*, device_type>;
 
  160   using input_buffer_type = Kokkos::View<const packet_type*, buffer_device_type>;
 
  161   using import_lids_type = Kokkos::View<const LO*, buffer_device_type>;
 
  163   using gids_scratch_type = Kokkos::View<GO*, device_type>;
 
  164   using pids_scratch_type = Kokkos::View<int*,device_type>;
 
  166   row_ptrs_type row_ptrs_beg;
 
  167   row_ptrs_type row_ptrs_end;
 
  168   indices_type indices;
 
  169   input_buffer_type imports;
 
  170   num_packets_per_lid_type num_packets_per_lid;
 
  171   import_lids_type import_lids;
 
  172   offsets_type offsets;
 
  175   Kokkos::Experimental::UniqueToken<execution_space,
 
  176                                     Kokkos::Experimental::UniqueTokenScope::Global> tokens;
 
  177   gids_scratch_type gids_scratch;
 
  178   pids_scratch_type pids_scratch;
 
  181   using value_type = Kokkos::pair<int, LO>;
 
  184       const row_ptrs_type& row_ptrs_beg_in,
 
  185       const row_ptrs_type& row_ptrs_end_in,
 
  186       const indices_type& indices_in,
 
  187       const input_buffer_type& imports_in,
 
  188       const num_packets_per_lid_type& num_packets_per_lid_in,
 
  189       const import_lids_type& import_lids_in,
 
  190       const offsets_type& offsets_in,
 
  191       const size_t max_num_ent_in,
 
  192       const bool unpack_pids_in) :
 
  193     row_ptrs_beg(row_ptrs_beg_in),
 
  194     row_ptrs_end(row_ptrs_end_in),
 
  197     num_packets_per_lid(num_packets_per_lid_in),
 
  198     import_lids(import_lids_in),
 
  200     max_num_ent(max_num_ent_in),
 
  201     unpack_pids(unpack_pids_in),
 
  202     tokens(execution_space()),
 
  203     gids_scratch(
"gids_scratch", tokens.size() * max_num_ent),
 
  204     pids_scratch(
"pids_scratch", tokens.size() * max_num_ent)
 
  207   KOKKOS_INLINE_FUNCTION 
void init(value_type& dst)
 const 
  209     using Tpetra::Details::OrdinalTraits;
 
  210     dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
 
  213   KOKKOS_INLINE_FUNCTION 
void 
  214   join(
volatile value_type& dst, 
const volatile value_type& src)
 const 
  220     using Tpetra::Details::OrdinalTraits;
 
  221     if (src.second != OrdinalTraits<LO>::invalid()) {
 
  226       if (dst.second == OrdinalTraits<LO>::invalid() ||
 
  227           src.second < dst.second) {
 
  233   KOKKOS_INLINE_FUNCTION
 
  234   void operator()(
const LO i, value_type& dst)
 const 
  237     using Kokkos::subview;
 
  238     using Kokkos::MemoryUnmanaged;
 
  239     using size_type = 
typename execution_space::size_type;
 
  240     using slice = 
typename Kokkos::pair<size_type, size_type>;
 
  242     using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
 
  243     using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
 
  245     const size_t num_packets_this_lid = num_packets_per_lid(i);
 
  246     const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2
 
  247                                          : num_packets_this_lid;
 
  248     if (unpack_pids && num_packets_this_lid%2 != 0) {
 
  251       dst = Kokkos::make_pair(1, i);
 
  261     const size_t buf_size = imports.size();
 
  262     const size_t offset = offsets(i);
 
  264     if (offset > buf_size || offset + num_packets_this_lid > buf_size) {
 
  265       dst = Kokkos::make_pair(2, i); 
 
  272     const size_type token = tokens.acquire();
 
  273     const size_t a = 
static_cast<size_t>(token) * max_num_ent;
 
  274     const size_t b = a + num_ent;
 
  275     gids_out_type gids_out = subview(gids_scratch, slice(a, b));
 
  276     pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
 
  278     const int err = unpackRow (gids_out, pids_out, imports, offset, num_ent);
 
  281       dst = Kokkos::make_pair(3, i);
 
  282       tokens.release(token);
 
  286     auto import_lid = import_lids(i);
 
  287     for (
size_t k = 0; k < num_ent; ++k) {
 
  288       indices(row_ptrs_end(import_lid)) = gids_out(k);
 
  290       row_ptrs_end(import_lid) += 1;
 
  293     tokens.release(token);
 
  298 template<
class NumPackets, 
class ImportL
ids, 
class Device>
 
  299 Kokkos::UnorderedMap<
typename ImportLids::non_const_value_type,
 
  300                      typename NumPackets::non_const_value_type,
 
  302 computeCrsPadding(
const NumPackets& num_packets_per_lid,
 
  303                   const ImportLids& import_lids,
 
  304                   const bool unpack_pids)
 
  308   using key_type = 
typename ImportLids::non_const_value_type;
 
  309   using val_type = 
typename NumPackets::non_const_value_type;
 
  310   Kokkos::UnorderedMap<key_type, val_type, Device> padding(import_lids.size());
 
  311   auto policy = Kokkos::RangePolicy<typename Device::execution_space>(0, import_lids.size());
 
  312   Kokkos::parallel_for(
"Fill padding", policy,
 
  313       KOKKOS_LAMBDA(
typename ImportLids::size_type i) {
 
  314         auto how_much_padding = (unpack_pids) ? num_packets_per_lid(i)/2
 
  315                                               : num_packets_per_lid(i);
 
  316         padding.insert(import_lids(i), how_much_padding);
 
  319     TEUCHOS_TEST_FOR_EXCEPTION(padding.failed_insert(), std::runtime_error,
 
  320       "computeCrsPadding: failed to insert one or more indices in to padding map");
 
  330 template<
class LocalOrdinal, 
class Packet, 
class RowView,
 
  331          class IndicesView, 
class BufferDevice>
 
  334 (
const RowView& row_ptrs_beg,
 
  335  const RowView& row_ptrs_end,
 
  336  IndicesView& indices,
 
  337  const Kokkos::View<const Packet*, BufferDevice, Kokkos::MemoryUnmanaged>& imports,
 
  338  const Kokkos::View<const size_t*, BufferDevice, Kokkos::MemoryUnmanaged>& num_packets_per_lid,
 
  339  const Kokkos::View<const LocalOrdinal*, BufferDevice, Kokkos::MemoryUnmanaged>& import_lids,
 
  340  const bool unpack_pids)
 
  343   using ImportLidsView =
 
  344     Kokkos::View<const LocalOrdinal*, BufferDevice, Kokkos::MemoryUnmanaged>;
 
  345   using NumPacketsView =
 
  346     Kokkos::View<const size_t*, BufferDevice, Kokkos::MemoryUnmanaged>;
 
  347   using LO = LocalOrdinal;
 
  348   using execution_space = 
typename BufferDevice::execution_space;
 
  350     Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
 
  351   using unpack_functor_type =
 
  352     UnpackAndCombineFunctor<LO, Packet, RowView, IndicesView, BufferDevice>;
 
  354   const char prefix[] =
 
  355     "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: ";
 
  357   const size_t num_import_lids = 
static_cast<size_t>(import_lids.extent(0));
 
  358   if (num_import_lids == 0) {
 
  363   using device_type = 
typename IndicesView::device_type;
 
  367     computeCrsPadding<NumPacketsView, ImportLidsView, device_type>
 
  368       (num_packets_per_lid, import_lids, unpack_pids);
 
  369   padCrsArrays<RowView, IndicesView, decltype (padding) > (row_ptrs_beg, row_ptrs_end, indices, padding);
 
  372   Kokkos::View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
 
  379   Kokkos::parallel_reduce
 
  381      range_policy (0, LO (num_packets_per_lid.size ())),
 
  382      KOKKOS_LAMBDA (
const LO i, 
size_t& running_max_num_ent) {
 
  383        const size_t num_packets_this_lid = num_packets_per_lid(i);
 
  384        const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2 :
 
  385          num_packets_this_lid;
 
  386        if (num_ent > running_max_num_ent) {
 
  387          running_max_num_ent = num_ent;
 
  389      }, Kokkos::Max<size_t> (max_num_ent));
 
  392   unpack_functor_type f (row_ptrs_beg, row_ptrs_end, indices, imports,
 
  393                          num_packets_per_lid, import_lids, offsets,
 
  394                          max_num_ent, unpack_pids);
 
  396   typename unpack_functor_type::value_type x;
 
  397   Kokkos::parallel_reduce(range_policy(0, static_cast<LO>(num_import_lids)), f, x);
 
  398   auto x_h = x.to_std_pair();
 
  399   TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
 
  400       prefix << 
"UnpackAndCombineFunctor reported error code " 
  401              << x_h.first << 
" for the first bad row " << x_h.second);
 
  404 template<
class Packet, 
class LocalGraph, 
class BufferDevice>
 
  407   const LocalGraph& local_graph,
 
  408   const Kokkos::View<
const typename LocalGraph::data_type*,
 
  409                      typename LocalGraph::device_type,
 
  410                      Kokkos::MemoryUnmanaged> permute_from_lids,
 
  411   const Kokkos::View<const Packet*, BufferDevice>& ,
 
  412   const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  413   const size_t num_same_ids)
 
  415   using Kokkos::parallel_reduce;
 
  416   using local_graph_type = LocalGraph;
 
  417   using LO = 
typename local_graph_type::data_type;
 
  418   using device_type = 
typename local_graph_type::device_type;
 
  419   using execution_space = 
typename device_type::execution_space;
 
  420   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
 
  426   num_items = 
static_cast<LO
>(num_same_ids);
 
  430       range_policy(0, num_items),
 
  431       KOKKOS_LAMBDA(
const LO lid, 
size_t& update) {
 
  432         update += 
static_cast<size_t>(local_graph.row_map[lid+1]
 
  433                                      -local_graph.row_map[lid]);
 
  439   num_items = 
static_cast<LO
>(permute_from_lids.extent(0));
 
  443       range_policy(0, num_items),
 
  444       KOKKOS_LAMBDA(
const LO i, 
size_t& update) {
 
  445         const LO lid = permute_from_lids(i);
 
  446         update += 
static_cast<size_t>(local_graph.row_map[lid+1]
 
  447                                      - local_graph.row_map[lid]);
 
  454     size_t tot_num_ent = 0;
 
  455     parallel_reduce(
"SumReduce",
 
  456         num_packets_per_lid.size(),
 
  457         KOKKOS_LAMBDA(
const int& i, 
size_t& lsum) {
 
  458           lsum += num_packets_per_lid(i) / 2;
 
  459         }, Kokkos::Sum<size_t>(tot_num_ent));
 
  460     count += tot_num_ent;
 
  467 template<
class Packet, 
class LO, 
class Device, 
class BufferDevice>
 
  469 setupRowPointersForRemotes(
 
  470   const Kokkos::View<size_t*, Device>& tgt_rowptr,
 
  471   const Kokkos::View<const LO*, BufferDevice>& import_lids,
 
  472   const Kokkos::View<const Packet*, BufferDevice>& ,
 
  473   const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid)
 
  475   using Kokkos::parallel_reduce;
 
  476   using device_type = Device;
 
  477   using execution_space = 
typename device_type::execution_space;
 
  478   using size_type = 
typename Kokkos::View<size_t*,device_type>::size_type;
 
  479   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  481   const size_type N = num_packets_per_lid.extent(0);
 
  482   parallel_for(
"Setup row pointers for remotes",
 
  484     KOKKOS_LAMBDA(
const size_t i){
 
  485       using atomic_incr_type = 
typename std::remove_reference<decltype(tgt_rowptr(0))>::type;
 
  486       const size_t num_packets_this_lid = num_packets_per_lid(i);
 
  487       const size_t num_ent = num_packets_this_lid / 2;
 
  488       Kokkos::atomic_fetch_add(&tgt_rowptr(import_lids(i)), atomic_incr_type(num_ent));
 
  493 template<
class Device>
 
  495 makeCrsRowPtrFromLengths(
 
  496     const Kokkos::View<size_t*,Device,Kokkos::MemoryUnmanaged>& tgt_rowptr,
 
  497     const Kokkos::View<size_t*,Device>& new_start_row)
 
  499   using Kokkos::parallel_scan;
 
  500   using device_type = Device;
 
  501   using execution_space = 
typename device_type::execution_space;
 
  502   using size_type = 
typename Kokkos::View<size_t*,device_type>::size_type;
 
  503   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  504   const size_type N = new_start_row.extent(0);
 
  507     KOKKOS_LAMBDA(
const size_t& i, 
size_t& update, 
const bool& 
final) {
 
  508       auto cur_val = tgt_rowptr(i);
 
  510         tgt_rowptr(i) = update;
 
  511         new_start_row(i) = tgt_rowptr(i);
 
  518 template<
class LocalGraph, 
class LocalMap>
 
  521     const Kokkos::View<
typename LocalMap::global_ordinal_type*,
 
  522                        typename LocalMap::device_type>& tgt_colind,
 
  523     const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
 
  524     const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
 
  525     const Kokkos::View<size_t*, typename LocalMap::device_type>& tgt_rowptr,
 
  526     const Kokkos::View<const int*, typename LocalMap::device_type>& src_pids,
 
  527     const LocalGraph& local_graph,
 
  528     const LocalMap& local_col_map,
 
  529     const size_t num_same_ids,
 
  532   using Kokkos::parallel_for;
 
  533   using device_type = 
typename LocalMap::device_type;
 
  534   using LO = 
typename LocalMap::local_ordinal_type;
 
  535   using execution_space = 
typename device_type::execution_space;
 
  536   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
 
  539     range_policy(0, num_same_ids),
 
  540     KOKKOS_LAMBDA(
const size_t i) {
 
  541       using atomic_incr_type =
typename std::remove_reference<decltype(new_start_row(0))>::type;
 
  543       const LO src_lid    = 
static_cast<LO
>(i);
 
  544       size_t src_row = local_graph.row_map(src_lid);
 
  546       const LO tgt_lid      = 
static_cast<LO
>(i);
 
  547       const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  549       const size_t nsr = local_graph.row_map(src_lid+1)
 
  550                        - local_graph.row_map(src_lid);
 
  551       Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  553       for (
size_t j=local_graph.row_map(src_lid);
 
  554                   j<local_graph.row_map(src_lid+1); ++j) {
 
  555         LO src_col = local_graph.entries(j);
 
  556         tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  557         tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  563 template<
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  565 copyDataFromPermuteIDs(
 
  566     const Kokkos::View<
typename LocalMap::global_ordinal_type*,
 
  567                        typename LocalMap::device_type>& tgt_colind,
 
  568     const Kokkos::View<
int*,
 
  569                        typename LocalMap::device_type>& tgt_pids,
 
  570     const Kokkos::View<
size_t*,
 
  571                        typename LocalMap::device_type>& new_start_row,
 
  572     const Kokkos::View<
size_t*,
 
  573                        typename LocalMap::device_type>& tgt_rowptr,
 
  574     const Kokkos::View<
const int*,
 
  575                        typename LocalMap::device_type>& src_pids,
 
  576     const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
 
  577       BufferDevice, Kokkos::MemoryUnmanaged>& permute_to_lids,
 
  578     const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
 
  579       BufferDevice, Kokkos::MemoryUnmanaged>& permute_from_lids,
 
  580     const LocalGraph& local_graph,
 
  581     const LocalMap& local_col_map,
 
  584   using Kokkos::parallel_for;
 
  585   using device_type = 
typename LocalMap::device_type;
 
  586   using LO = 
typename LocalMap::local_ordinal_type;
 
  587   using execution_space = 
typename device_type::execution_space;
 
  588   using size_type = 
typename Kokkos::View<LO*,device_type>::size_type;
 
  589   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  591   const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
  594     range_policy(0, num_permute_to_lids),
 
  595     KOKKOS_LAMBDA(
const size_t i) {
 
  596       using atomic_incr_type = 
typename std::remove_reference<decltype(new_start_row(0))>::type;
 
  598       const LO src_lid = permute_from_lids(i);
 
  599       const size_t src_row = local_graph.row_map(src_lid);
 
  601       const LO tgt_lid = permute_to_lids(i);
 
  602       const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  604       size_t nsr = local_graph.row_map(src_lid+1)
 
  605                  - local_graph.row_map(src_lid);
 
  606       Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  608       for (
size_t j=local_graph.row_map(src_lid);
 
  609                   j<local_graph.row_map(src_lid+1); ++j) {
 
  610         LO src_col = local_graph.entries(j);
 
  611         tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  612         tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  618 template<
class Packet, 
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  620 unpackAndCombineIntoCrsArrays2(
 
  621     const Kokkos::View<typename LocalMap::global_ordinal_type*, typename LocalMap::device_type>& tgt_colind,
 
  622     const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
 
  623     const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
 
  624     const Kokkos::View<const size_t*, typename LocalMap::device_type>& offsets,
 
  626       const typename LocalMap::local_ordinal_type*,
 
  628       Kokkos::MemoryUnmanaged>& import_lids,
 
  629     const Kokkos::View<const Packet*, BufferDevice>& imports,
 
  630     const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  636   using Kokkos::subview;
 
  637   using Kokkos::MemoryUnmanaged;
 
  638   using Kokkos::parallel_reduce;
 
  639   using Kokkos::atomic_fetch_add;
 
  641   using device_type = 
typename LocalMap::device_type;
 
  642   using LO = 
typename LocalMap::local_ordinal_type;
 
  643   using GO = 
typename LocalMap::global_ordinal_type;
 
  644   using execution_space = 
typename device_type::execution_space;
 
  645   using size_type = 
typename Kokkos::View<LO*, device_type>::size_type;
 
  646   using slice = 
typename Kokkos::pair<size_type, size_type>;
 
  647   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  649   using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
 
  650   using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
 
  652   const size_type num_import_lids = import_lids.size();
 
  653   const char prefix[] = 
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays2: ";
 
  657   parallel_reduce(
"Unpack and combine into CRS",
 
  658     range_policy(0, num_import_lids),
 
  659     KOKKOS_LAMBDA(
const size_t i, 
int& err) {
 
  660       using atomic_incr_type = 
typename std::remove_reference< decltype( new_start_row(0) )>::type;
 
  661       const size_t num_packets_this_lid = num_packets_per_lid(i);
 
  662       const size_t num_ent = num_packets_this_lid / 2;
 
  663       const size_t offset = offsets(i);
 
  664       const LO lcl_row = import_lids(i);
 
  665       const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
 
  666       const size_t end_row = start_row + num_ent;
 
  668       gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
 
  669       pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
 
  671       err += unpackRow (gids_out, pids_out, imports, offset, num_ent);
 
  674       for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
 
  675         const int pid = pids_out(j);
 
  676         pids_out(j) = (pid != my_pid) ? pid : -1;
 
  680   TEUCHOS_TEST_FOR_EXCEPTION(gbl_err_count != 0,
 
  681       std::invalid_argument, prefix <<
 
  682       "Attempting to unpack PIDs, but num_ent is not even; this should never " 
  683       "happen!  Please report this bug to the Tpetra developers.");
 
  688 template<
class Packet, 
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  691     const LocalGraph & local_graph,
 
  692     const LocalMap & local_col_map,
 
  693     const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
 
  695                        Kokkos::MemoryUnmanaged>& import_lids,
 
  696     const Kokkos::View<const Packet*, BufferDevice>& imports,
 
  697     const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  698     const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
 
  700                        Kokkos::MemoryUnmanaged>& permute_to_lids,
 
  701     const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
 
  703                        Kokkos::MemoryUnmanaged>& permute_from_lids,
 
  704     const Kokkos::View<
size_t*,
 
  705                        typename LocalMap::device_type,
 
  706                        Kokkos::MemoryUnmanaged>& tgt_rowptr,
 
  707     const Kokkos::View<
typename LocalMap::global_ordinal_type*,
 
  708                        typename LocalMap::device_type,
 
  709                        Kokkos::MemoryUnmanaged>& tgt_colind,
 
  710     const Kokkos::View<
const int*,
 
  711                        typename LocalMap::device_type,
 
  712                        Kokkos::MemoryUnmanaged>& src_pids,
 
  713     const Kokkos::View<
int*,
 
  714                        typename LocalMap::device_type,
 
  715                        Kokkos::MemoryUnmanaged>& tgt_pids,
 
  716     const size_t num_same_ids,
 
  717     const size_t tgt_num_rows,
 
  718     const size_t tgt_num_nonzeros,
 
  719     const int my_tgt_pid)
 
  722   using Kokkos::subview;
 
  723   using Kokkos::parallel_for;
 
  724   using Kokkos::MemoryUnmanaged;
 
  725   using packet_type = Packet;
 
  726   using local_map_type = LocalMap;
 
  727   using local_graph_type = LocalGraph;
 
  728   using buffer_device_type = BufferDevice;
 
  729   using device_type = 
typename LocalMap::device_type;
 
  730   using LO = 
typename LocalMap::local_ordinal_type;
 
  731   using execution_space = 
typename device_type::execution_space;
 
  732   using size_type = 
typename Kokkos::View<LO*, device_type>::size_type;
 
  733   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
 
  735   const char prefix[] = 
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays: ";
 
  737   const size_t N = tgt_num_rows;
 
  738   const size_t mynnz = tgt_num_nonzeros;
 
  742   const int my_pid = my_tgt_pid;
 
  751     range_policy(0, N+1),
 
  752     KOKKOS_LAMBDA(
const size_t i) {
 
  759     range_policy(0, num_same_ids),
 
  760     KOKKOS_LAMBDA(
const size_t i) {
 
  761       const LO tgt_lid = 
static_cast<LO
>(i);
 
  762       const LO src_lid = 
static_cast<LO
>(i);
 
  763       tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
 
  764                           - local_graph.row_map(src_lid);
 
  769   const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
  771     range_policy(0, num_permute_to_lids),
 
  772     KOKKOS_LAMBDA(
const size_t i) {
 
  773       const LO tgt_lid = permute_to_lids(i);
 
  774       const LO src_lid = permute_from_lids(i);
 
  775       tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
 
  776                           - local_graph.row_map(src_lid);
 
  781   const size_type num_import_lids = import_lids.extent(0);
 
  782   View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
 
  785 #ifdef HAVE_TPETRA_DEBUG 
  787     auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
 
  788     const bool condition =
 
  789       nth_offset_h != 
static_cast<size_t>(imports.extent(0));
 
  790     TEUCHOS_TEST_FOR_EXCEPTION
 
  791       (condition, std::logic_error, prefix
 
  792        << 
"The final offset in bytes " << nth_offset_h
 
  793        << 
" != imports.size() = " << imports.extent(0)
 
  794        << 
".  Please report this bug to the Tpetra developers.");
 
  796 #endif // HAVE_TPETRA_DEBUG 
  799   setupRowPointersForRemotes<packet_type,LO,device_type,buffer_device_type>(
 
  800       tgt_rowptr, import_lids, imports, num_packets_per_lid);
 
  804   View<size_t*, device_type> new_start_row(
"new_start_row", N+1);
 
  807   makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
 
  809     auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
 
  810     bool condition = nth_tgt_rowptr_h != mynnz;
 
  811     TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
 
  812       prefix << 
"CRS_rowptr[last] = " <<
 
  813       nth_tgt_rowptr_h << 
"!= mynnz = " << mynnz << 
".");
 
  817   copyDataFromSameIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
 
  818       tgt_rowptr, src_pids, local_graph, local_col_map, num_same_ids, my_pid);
 
  820   copyDataFromPermuteIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
 
  821       tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
 
  822       local_graph, local_col_map, my_pid);
 
  824   if (imports.extent(0) <= 0) {
 
  828   unpackAndCombineIntoCrsArrays2<
 
  829     packet_type,local_graph_type,local_map_type,buffer_device_type>(
 
  830         tgt_colind, tgt_pids, new_start_row, offsets, import_lids, imports,
 
  831         num_packets_per_lid, local_graph, local_col_map, my_pid);
 
  871 template<
class LO, 
class GO, 
class Node>
 
  876     const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
 
  877     const Teuchos::ArrayView<const LO>& importLIDs,
 
  884       "Graph must be globally indexed!");
 
  888   using UnpackAndCombineCrsGraphImpl::unpackAndCombine;
 
  890   using device_type = 
typename Node::device_type;
 
  891   using buffer_device_type = 
typename graph_type::buffer_device_type;
 
  892   using execution_space = 
typename device_type::execution_space;
 
  893   using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
 
  894   using row_ptrs_type = 
typename graph_type::local_graph_type::row_map_type::non_const_type;
 
  895   using indices_type = 
typename graph_type::t_GlobalOrdinal_1D;
 
  899   buffer_device_type bufferOutputDevice;
 
  906         imports.getRawPtr(), imports.size(),
 
  909   auto num_packets_per_lid_d =
 
  911         numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
 
  912         true, 
"num_packets_per_lid");
 
  916         importLIDs.getRawPtr(), importLIDs.size(),
 
  917         true, 
"import_lids");
 
  921   indices_type indices(
"indices", graph.
k_gblInds1D_.extent(0));
 
  924   row_ptrs_type row_ptrs_beg(
"row_ptrs_beg", graph.
k_rowPtrs_.extent(0));
 
  927   const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1);
 
  928   row_ptrs_type row_ptrs_end(
"row_ptrs_end", N);
 
  930   bool refill_num_row_entries = 
false;
 
  933     refill_num_row_entries = 
true;
 
  935     Kokkos::parallel_for(
"Fill end row pointers", range_policy(0, N),
 
  936         KOKKOS_LAMBDA(
const size_t i){
 
  937           row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
 
  945     Kokkos::parallel_for(
"Fill end row pointers",
 
  946         range_policy(0, N), KOKKOS_LAMBDA(
const size_t i){
 
  947         row_ptrs_end(i) = row_ptrs_beg(i+1);
 
  952   unpackAndCombine<LO, GO, row_ptrs_type, indices_type, buffer_device_type>
 
  953     (row_ptrs_beg, row_ptrs_end, indices, imports_d,
 
  954      num_packets_per_lid_d, import_lids_d, 
false);
 
  958   if (refill_num_row_entries) {
 
  959     Kokkos::parallel_for(
"Fill num entries",
 
  960         range_policy(0, N), KOKKOS_LAMBDA(
const size_t i){
 
  970 template<
class LO, 
class GO, 
class Node>
 
  972 unpackCrsGraphAndCombineNew(
 
  976     const Kokkos::DualView<
const size_t*,
 
  978     const Kokkos::DualView<
const LO*,
 
  984   TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, 
"METHOD NOT COMPLETE");
 
  986   using UnpackAndCombineCrsGraphImpl::unpackAndCombine;
 
  989   using device_type = 
typename Node::device_type;
 
  991   using packet_type = 
typename graph_type::packet_type;
 
  992   using local_graph_type = 
typename graph_type::local_graph_type;
 
  993   using buffer_device_type = 
typename graph_type::buffer_device_type;
 
  994   using buffer_memory_space = 
typename buffer_device_type::memory_space;
 
  995   using memory_space = 
typename device_type::memory_space;
 
  997   using row_ptrs_type = 
typename graph_type::local_graph_type::row_map_type::non_const_type;
 
  998   using execution_space = 
typename device_type::execution_space;
 
  999   using indices_type =  Kokkos::View<GO*, execution_space>;
 
 1001   static_assert(std::is_same<device_type, typename local_graph_type::device_type>::value,
 
 1002                 "Node::device_type and LocalGraph::device_type must be " 
 1007     numPacketsPerLID_nc.sync_device ();
 
 1009   auto num_packets_per_lid_d = numPacketsPerLID.view_device ();
 
 1011   TEUCHOS_ASSERT( ! importLIDs.need_sync_device () );
 
 1012   auto import_lids_d = importLIDs.view_device ();
 
 1016     imports_nc.sync_device ();
 
 1018   auto imports_d = imports.view_device ();
 
 1022   indices_type indices;
 
 1023   row_ptrs_type row_ptrs_beg;
 
 1024   row_ptrs_type row_ptrs_end;
 
 1025   unpackAndCombine<LO,packet_type,row_ptrs_type,indices_type,device_type,buffer_device_type>(
 
 1026       row_ptrs_beg, row_ptrs_end, indices, imports_d,
 
 1027       num_packets_per_lid_d, import_lids_d, 
false);
 
 1080 template<
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1084     const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
 
 1086     const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
 
 1091     const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
 
 1092     const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
 
 1094   using Kokkos::MemoryUnmanaged;
 
 1096   using device_type = 
typename Node::device_type;
 
 1100   const char prefix[] = 
"unpackAndCombineWithOwningPIDsCount: ";
 
 1102   TEUCHOS_TEST_FOR_EXCEPTION
 
 1103     (permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
 
 1104      prefix << 
"permuteToLIDs.size() = " << permuteToLIDs.size() << 
" != " 
 1105      "permuteFromLIDs.size() = " << permuteFromLIDs.size() << 
".");
 
 1109   TEUCHOS_TEST_FOR_EXCEPTION
 
 1110     (! locallyIndexed, std::invalid_argument, prefix << 
"The input " 
 1111     "CrsGraph 'sourceGraph' must be locally indexed.");
 
 1112   TEUCHOS_TEST_FOR_EXCEPTION
 
 1113     (importLIDs.size() != numPacketsPerLID.size(), std::invalid_argument,
 
 1114      prefix << 
"importLIDs.size() = " << importLIDs.size() << 
" != " 
 1115      "numPacketsPerLID.size() = " << numPacketsPerLID.size() << 
".");
 
 1118   auto permute_from_lids_d =
 
 1120                                            permuteFromLIDs.getRawPtr(),
 
 1121                                            permuteFromLIDs.size(), 
true,
 
 1122                                            "permute_from_lids");
 
 1125                                            imports.getRawPtr(),
 
 1126                                            imports.size(), 
true,
 
 1128   auto num_packets_per_lid_d =
 
 1130                                            numPacketsPerLID.getRawPtr(),
 
 1131                                            numPacketsPerLID.size(), 
true,
 
 1132                                            "num_packets_per_lid");
 
 1135     packet_type,local_graph_type,buffer_device_type>(
 
 1136       local_graph, permute_from_lids_d, imports_d, num_packets_per_lid_d, numSameIDs);
 
 1152 template<
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1156     const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
 
 1158     const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
 
 1162     const size_t numSameIDs,
 
 1163     const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
 
 1164     const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
 
 1165     size_t TargetNumRows,
 
 1166     size_t TargetNumNonzeros,
 
 1167     const int MyTargetPID,
 
 1168     const Teuchos::ArrayView<size_t>& CRS_rowptr,
 
 1169     const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
 
 1170     const Teuchos::ArrayView<const int>& SourcePids,
 
 1171     Teuchos::Array<int>& TargetPids)
 
 1175   using Teuchos::outArg;
 
 1176   using Teuchos::REDUCE_MAX;
 
 1177   using Teuchos::reduceAll;
 
 1178   using LO = LocalOrdinal;
 
 1179   using GO = GlobalOrdinal;
 
 1181   using packet_type = 
typename crs_graph_type::packet_type;
 
 1182   using local_graph_type = 
typename crs_graph_type::local_graph_type;
 
 1183   using buffer_device_type = 
typename crs_graph_type::buffer_device_type;
 
 1184   using device_type = 
typename Node::device_type;
 
 1185   using size_type = 
typename Teuchos::ArrayView<const LO>::size_type;
 
 1187   const char prefix[] = 
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
 
 1189   TEUCHOS_TEST_FOR_EXCEPTION(
 
 1190     TargetNumRows + 1 != static_cast<size_t>(CRS_rowptr.size()),
 
 1191     std::invalid_argument, prefix << 
"CRS_rowptr.size() = " <<
 
 1192     CRS_rowptr.size() << 
"!= TargetNumRows+1 = " << TargetNumRows+1 << 
".");
 
 1194   TEUCHOS_TEST_FOR_EXCEPTION(
 
 1195     permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
 
 1196     prefix << 
"permuteToLIDs.size() = " << permuteToLIDs.size()
 
 1197     << 
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << 
".");
 
 1198   const size_type numImportLIDs = importLIDs.size();
 
 1200   TEUCHOS_TEST_FOR_EXCEPTION(
 
 1201     numImportLIDs != numPacketsPerLID.size(), std::invalid_argument,
 
 1202     prefix << 
"importLIDs.size() = " << numImportLIDs << 
" != " 
 1203     "numPacketsPerLID.size() = " << numPacketsPerLID.size() << 
".");
 
 1206   if (static_cast<size_t>(TargetPids.size()) != TargetNumNonzeros) {
 
 1207     TargetPids.resize(TargetNumNonzeros);
 
 1209   TargetPids.assign(TargetNumNonzeros, -1);
 
 1213   auto local_col_map = sourceGraph.
getColMap()->getLocalMap();
 
 1216   device_type outputDevice;
 
 1217   buffer_device_type bufferOutputDevice;
 
 1219   Kokkos::View<const LO*, buffer_device_type> import_lids_d =
 
 1221       (bufferOutputDevice, importLIDs.getRawPtr(),
 
 1222        importLIDs.size(), 
true, 
"import_lids");
 
 1224   Kokkos::View<const packet_type*, buffer_device_type> imports_d =
 
 1226       (bufferOutputDevice, imports.getRawPtr(),
 
 1227        imports.size(), 
true, 
"imports");
 
 1229   Kokkos::View<const size_t*, buffer_device_type> num_packets_per_lid_d =
 
 1231       numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
 
 1232       true, 
"num_packets_per_lid");
 
 1234   Kokkos::View<const LO*, buffer_device_type> permute_to_lids_d =
 
 1236       permuteToLIDs.getRawPtr(), permuteToLIDs.size(),
 
 1237       true, 
"permute_to_lids");
 
 1239   Kokkos::View<const LO*, buffer_device_type> permute_from_lids_d =
 
 1241       permuteFromLIDs.getRawPtr(), permuteFromLIDs.size(),
 
 1242       true, 
"permute_from_lids");
 
 1244   Kokkos::View<size_t*, device_type> crs_rowptr_d =
 
 1246       CRS_rowptr.getRawPtr(), CRS_rowptr.size(),
 
 1247       true, 
"crs_rowptr");
 
 1249   Kokkos::View<GO*, device_type> crs_colind_d =
 
 1251       CRS_colind.getRawPtr(), CRS_colind.size(),
 
 1252       true, 
"crs_colidx");
 
 1254   Kokkos::View<const int*, device_type> src_pids_d =
 
 1256       SourcePids.getRawPtr(), SourcePids.size(),
 
 1259   Kokkos::View<int*, device_type> tgt_pids_d =
 
 1261       TargetPids.getRawPtr(), TargetPids.size(),
 
 1264   using local_map_type = decltype(local_col_map);
 
 1266     packet_type,local_graph_type,local_map_type,buffer_device_type>(
 
 1267       local_graph, local_col_map, import_lids_d, imports_d, num_packets_per_lid_d,
 
 1268       permute_to_lids_d, permute_from_lids_d, crs_rowptr_d, crs_colind_d, src_pids_d,
 
 1269       tgt_pids_d, numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID);
 
 1274   typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
 
 1275       CRS_rowptr.getRawPtr(), CRS_rowptr.size());
 
 1278   typename decltype(crs_colind_d)::HostMirror crs_colind_h(
 
 1279       CRS_colind.getRawPtr(), CRS_colind.size());
 
 1282   typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
 
 1283       TargetPids.getRawPtr(), TargetPids.size());
 
 1291 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT( LO, GO, NT ) \ 
 1293   Details::unpackCrsGraphAndCombine<LO, GO, NT>( \ 
 1294     CrsGraph<LO, GO, NT>&, \ 
 1295     const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type>&, \ 
 1296     const Teuchos::ArrayView<const size_t>&, \ 
 1297     const Teuchos::ArrayView<const LO>&, \ 
 1302   Details::unpackCrsGraphAndCombineNew<LO, GO, NT>( \ 
 1303     CrsGraph<LO, GO, NT>&, \ 
 1304     const Kokkos::DualView<const CrsGraph<LO, GO, NT>::packet_type*, \ 
 1305                            CrsGraph<LO, GO, NT>::buffer_device_type>&, \ 
 1306     const Kokkos::DualView<const size_t*, \ 
 1307                            CrsGraph<LO, GO, NT>::buffer_device_type>&, \ 
 1308     const Kokkos::DualView<const LO*, \ 
 1309                            CrsGraph<LO, GO, NT>::buffer_device_type>&, \ 
 1312     const CombineMode); \ 
 1314   Details::unpackAndCombineIntoCrsArrays<LO, GO, NT>( \ 
 1315     const CrsGraph<LO, GO, NT> &, \ 
 1316     const Teuchos::ArrayView<const LO>&, \ 
 1317     const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type>&, \ 
 1318     const Teuchos::ArrayView<const size_t>&, \ 
 1321     const CombineMode, \ 
 1323     const Teuchos::ArrayView<const LO>&, \ 
 1324     const Teuchos::ArrayView<const LO>&, \ 
 1328     const Teuchos::ArrayView<size_t>&, \ 
 1329     const Teuchos::ArrayView<GO>&, \ 
 1330     const Teuchos::ArrayView<const int>&, \ 
 1331     Teuchos::Array<int>&); \ 
 1333   Details::unpackAndCombineWithOwningPIDsCount<LO, GO, NT>( \ 
 1334     const CrsGraph<LO, GO, NT> &, \ 
 1335     const Teuchos::ArrayView<const LO> &, \ 
 1336     const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type> &, \ 
 1337     const Teuchos::ArrayView<const size_t>&, \ 
 1342     const Teuchos::ArrayView<const LO>&, \ 
 1343     const Teuchos::ArrayView<const LO>&); 
 1345 #endif // TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP 
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input. 
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph. 
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows. 
Declaration of the Tpetra::CrsGraph class. 
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices. 
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst. 
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row. 
Functions for manipulating CRS arrays. 
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process. 
Sets up and executes a communication plan for a Tpetra DistObject. 
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for "1-D" storage. 
CombineMode
Rule for combining data in an Import or Export. 
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays 
Unpacks and combines a single row of the CrsGraph. 
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers. 
A distributed graph accessed by rows (adjacency lists) and stored sparsely. 
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices. 
void unpackCrsGraphAndCombine(CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, Distributor &distor, CombineMode combineMode)
Unpack the imported column indices and combine into graph. 
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts. 
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView. 
local_graph_type getLocalGraph() const 
Get the local graph. 
Declaration and definition of Tpetra::Details::getEntryOnHost. 
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer. 
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.