40 #ifndef TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
41 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include "Kokkos_Core.hpp"
79 #ifndef DOXYGEN_SHOULD_SKIP_THIS
82 #endif // DOXYGEN_SHOULD_SKIP_THIS
89 namespace UnpackAndCombineCrsGraphImpl {
100 template<
class Packet,
class GO,
class Device,
class BufferDevice>
102 unpackRow (
const Kokkos::View<GO*,Device,Kokkos::MemoryUnmanaged>& gids_out,
103 const Kokkos::View<int*,Device,Kokkos::MemoryUnmanaged>& pids_out,
104 const Kokkos::View<const Packet*,BufferDevice>& imports,
106 const size_t num_ent)
108 using size_type =
typename Kokkos::View<GO*,Device>::size_type;
116 for (size_type k=0; k<num_ent; k++)
117 gids_out(k) = imports(offset+k);
120 if (pids_out.size() > 0) {
121 for (size_type k=0; k<num_ent; k++) {
122 pids_out(k) =
static_cast<int>(imports(offset+num_ent+k));
139 template<
class LocalOrdinal,
146 using LO = LocalOrdinal;
147 using GO =
typename IndicesView::value_type;
148 using packet_type = Packet;
149 using row_ptrs_type = RowView;
150 using indices_type = IndicesView;
151 using buffer_device_type = BufferDevice;
153 using device_type =
typename IndicesView::device_type;
154 using execution_space =
typename device_type::execution_space;
156 using num_packets_per_lid_type = Kokkos::View<const size_t*, buffer_device_type>;
157 using offsets_type = Kokkos::View<const size_t*, device_type>;
158 using input_buffer_type = Kokkos::View<const packet_type*, buffer_device_type>;
159 using import_lids_type = Kokkos::View<const LO*, buffer_device_type>;
161 using gids_scratch_type = Kokkos::View<GO*, device_type>;
162 using pids_scratch_type = Kokkos::View<int*,device_type>;
164 row_ptrs_type row_ptrs_beg;
165 row_ptrs_type row_ptrs_end;
166 indices_type indices;
167 input_buffer_type imports;
168 num_packets_per_lid_type num_packets_per_lid;
169 import_lids_type import_lids;
170 offsets_type offsets;
173 Kokkos::Experimental::UniqueToken<execution_space,
174 Kokkos::Experimental::UniqueTokenScope::Global> tokens;
175 gids_scratch_type gids_scratch;
176 pids_scratch_type pids_scratch;
179 using value_type = Kokkos::pair<int, LO>;
182 const row_ptrs_type& row_ptrs_beg_in,
183 const row_ptrs_type& row_ptrs_end_in,
184 const indices_type& indices_in,
185 const input_buffer_type& imports_in,
186 const num_packets_per_lid_type& num_packets_per_lid_in,
187 const import_lids_type& import_lids_in,
188 const offsets_type& offsets_in,
189 const size_t max_num_ent_in,
190 const bool unpack_pids_in) :
191 row_ptrs_beg(row_ptrs_beg_in),
192 row_ptrs_end(row_ptrs_end_in),
195 num_packets_per_lid(num_packets_per_lid_in),
196 import_lids(import_lids_in),
198 max_num_ent(max_num_ent_in),
199 unpack_pids(unpack_pids_in),
200 tokens(execution_space()),
201 gids_scratch(
"gids_scratch", tokens.size() * max_num_ent),
202 pids_scratch(
"pids_scratch", tokens.size() * max_num_ent)
205 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const
207 using Tpetra::Details::OrdinalTraits;
208 dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
211 KOKKOS_INLINE_FUNCTION
void
212 join(
volatile value_type& dst,
const volatile value_type& src)
const
218 using Tpetra::Details::OrdinalTraits;
219 if (src.second != OrdinalTraits<LO>::invalid()) {
224 if (dst.second == OrdinalTraits<LO>::invalid() ||
225 src.second < dst.second) {
231 KOKKOS_INLINE_FUNCTION
232 void operator()(
const LO i, value_type& dst)
const
235 using Kokkos::subview;
236 using Kokkos::MemoryUnmanaged;
237 using size_type =
typename execution_space::size_type;
238 using slice =
typename Kokkos::pair<size_type, size_type>;
240 using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
241 using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
243 const size_t num_packets_this_lid = num_packets_per_lid(i);
244 const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2
245 : num_packets_this_lid;
246 if (unpack_pids && num_packets_this_lid%2 != 0) {
249 dst = Kokkos::make_pair(1, i);
259 const size_t buf_size = imports.size();
260 const size_t offset = offsets(i);
262 if (offset > buf_size || offset + num_packets_this_lid > buf_size) {
263 dst = Kokkos::make_pair(2, i);
270 const size_type token = tokens.acquire();
271 const size_t a =
static_cast<size_t>(token) * max_num_ent;
272 const size_t b = a + num_ent;
273 gids_out_type gids_out = subview(gids_scratch, slice(a, b));
274 pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
276 const int err = unpackRow (gids_out, pids_out, imports, offset, num_ent);
279 dst = Kokkos::make_pair(3, i);
280 tokens.release(token);
284 auto import_lid = import_lids(i);
285 for (
size_t k = 0; k < num_ent; ++k) {
286 indices(row_ptrs_end(import_lid)) = gids_out(k);
288 row_ptrs_end(import_lid) += 1;
291 tokens.release(token);
302 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node,
303 class RowView,
class IndicesView,
class BufferDevice>
306 (
const RowView& row_ptrs_beg,
307 const RowView& row_ptrs_end,
308 IndicesView& indices,
309 const Kokkos::View<
const GlobalOrdinal*, BufferDevice,
310 Kokkos::MemoryUnmanaged>& imports,
311 const Kokkos::View<
const size_t*, BufferDevice,
312 Kokkos::MemoryUnmanaged>& num_packets_per_lid,
313 const Kokkos::View<
const LocalOrdinal*, BufferDevice,
314 Kokkos::MemoryUnmanaged>& import_lids,
315 const typename CrsGraph<LocalOrdinal, GlobalOrdinal,
316 Node>::padding_type& padding,
317 const bool unpack_pids,
321 using LO = LocalOrdinal;
322 using GO = GlobalOrdinal;
323 using device_type =
typename Node::device_type;
324 using execution_space =
typename BufferDevice::execution_space;
326 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
327 using unpack_functor_type =
330 const char prefix[] =
331 "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: ";
333 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
334 if (num_import_lids == 0) {
340 padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding,
344 Kokkos::View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
351 Kokkos::parallel_reduce
353 range_policy (0, LO (num_packets_per_lid.size ())),
354 KOKKOS_LAMBDA (
const LO i,
size_t& running_max_num_ent) {
355 const size_t num_packets_this_lid = num_packets_per_lid(i);
356 const size_t num_ent = (unpack_pids) ? num_packets_this_lid/2 :
357 num_packets_this_lid;
358 if (num_ent > running_max_num_ent) {
359 running_max_num_ent = num_ent;
361 }, Kokkos::Max<size_t> (max_num_ent));
364 unpack_functor_type f (row_ptrs_beg, row_ptrs_end, indices, imports,
365 num_packets_per_lid, import_lids, offsets,
366 max_num_ent, unpack_pids);
368 typename unpack_functor_type::value_type x;
369 Kokkos::parallel_reduce(range_policy(0, static_cast<LO>(num_import_lids)), f, x);
370 auto x_h = x.to_std_pair();
371 TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
372 prefix <<
"UnpackAndCombineFunctor reported error code "
373 << x_h.first <<
" for the first bad row " << x_h.second);
376 template<
class Packet,
class LocalGraph,
class BufferDevice>
379 const LocalGraph& local_graph,
380 const Kokkos::View<
const typename LocalGraph::data_type*,
381 typename LocalGraph::device_type,
382 Kokkos::MemoryUnmanaged> permute_from_lids,
383 const Kokkos::View<const Packet*, BufferDevice>& ,
384 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
385 const size_t num_same_ids)
387 using Kokkos::parallel_reduce;
388 using local_graph_type = LocalGraph;
389 using LO =
typename local_graph_type::data_type;
390 using device_type =
typename local_graph_type::device_type;
391 using execution_space =
typename device_type::execution_space;
392 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
398 num_items =
static_cast<LO
>(num_same_ids);
402 range_policy(0, num_items),
403 KOKKOS_LAMBDA(
const LO lid,
size_t& update) {
404 update +=
static_cast<size_t>(local_graph.row_map[lid+1]
405 -local_graph.row_map[lid]);
411 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
415 range_policy(0, num_items),
416 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
417 const LO lid = permute_from_lids(i);
418 update +=
static_cast<size_t>(local_graph.row_map[lid+1]
419 - local_graph.row_map[lid]);
426 size_t tot_num_ent = 0;
427 parallel_reduce(
"SumReduce",
428 num_packets_per_lid.size(),
429 KOKKOS_LAMBDA(
const int& i,
size_t& lsum) {
430 lsum += num_packets_per_lid(i) / 2;
431 }, Kokkos::Sum<size_t>(tot_num_ent));
432 count += tot_num_ent;
439 template<
class Packet,
class LO,
class Device,
class BufferDevice>
441 setupRowPointersForRemotes(
442 const Kokkos::View<size_t*, Device>& tgt_rowptr,
443 const Kokkos::View<const LO*, BufferDevice>& import_lids,
444 const Kokkos::View<const Packet*, BufferDevice>& ,
445 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid)
447 using Kokkos::parallel_reduce;
448 using device_type = Device;
449 using execution_space =
typename device_type::execution_space;
450 using size_type =
typename Kokkos::View<size_t*,device_type>::size_type;
451 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
453 const size_type N = num_packets_per_lid.extent(0);
454 parallel_for(
"Setup row pointers for remotes",
456 KOKKOS_LAMBDA(
const size_t i){
457 using atomic_incr_type =
typename std::remove_reference<decltype(tgt_rowptr(0))>::type;
458 const size_t num_packets_this_lid = num_packets_per_lid(i);
459 const size_t num_ent = num_packets_this_lid / 2;
460 Kokkos::atomic_fetch_add(&tgt_rowptr(import_lids(i)), atomic_incr_type(num_ent));
465 template<
class Device>
467 makeCrsRowPtrFromLengths(
468 const Kokkos::View<size_t*,Device,Kokkos::MemoryUnmanaged>& tgt_rowptr,
469 const Kokkos::View<size_t*,Device>& new_start_row)
471 using Kokkos::parallel_scan;
472 using device_type = Device;
473 using execution_space =
typename device_type::execution_space;
474 using size_type =
typename Kokkos::View<size_t*,device_type>::size_type;
475 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
476 const size_type N = new_start_row.extent(0);
479 KOKKOS_LAMBDA(
const size_t& i,
size_t& update,
const bool&
final) {
480 auto cur_val = tgt_rowptr(i);
482 tgt_rowptr(i) = update;
483 new_start_row(i) = tgt_rowptr(i);
490 template<
class LocalGraph,
class LocalMap>
493 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
494 typename LocalMap::device_type>& tgt_colind,
495 const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
496 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
497 const Kokkos::View<size_t*, typename LocalMap::device_type>& tgt_rowptr,
498 const Kokkos::View<const int*, typename LocalMap::device_type>& src_pids,
499 const LocalGraph& local_graph,
500 const LocalMap& local_col_map,
501 const size_t num_same_ids,
504 using Kokkos::parallel_for;
505 using device_type =
typename LocalMap::device_type;
506 using LO =
typename LocalMap::local_ordinal_type;
507 using execution_space =
typename device_type::execution_space;
508 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
511 range_policy(0, num_same_ids),
512 KOKKOS_LAMBDA(
const size_t i) {
513 using atomic_incr_type =
typename std::remove_reference<decltype(new_start_row(0))>::type;
515 const LO src_lid =
static_cast<LO
>(i);
516 size_t src_row = local_graph.row_map(src_lid);
518 const LO tgt_lid =
static_cast<LO
>(i);
519 const size_t tgt_row = tgt_rowptr(tgt_lid);
521 const size_t nsr = local_graph.row_map(src_lid+1)
522 - local_graph.row_map(src_lid);
523 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
525 for (
size_t j=local_graph.row_map(src_lid);
526 j<local_graph.row_map(src_lid+1); ++j) {
527 LO src_col = local_graph.entries(j);
528 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
529 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
535 template<
class LocalGraph,
class LocalMap,
class BufferDevice>
537 copyDataFromPermuteIDs(
538 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
539 typename LocalMap::device_type>& tgt_colind,
540 const Kokkos::View<
int*,
541 typename LocalMap::device_type>& tgt_pids,
542 const Kokkos::View<
size_t*,
543 typename LocalMap::device_type>& new_start_row,
544 const Kokkos::View<
size_t*,
545 typename LocalMap::device_type>& tgt_rowptr,
546 const Kokkos::View<
const int*,
547 typename LocalMap::device_type>& src_pids,
548 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
549 BufferDevice, Kokkos::MemoryUnmanaged>& permute_to_lids,
550 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
551 BufferDevice, Kokkos::MemoryUnmanaged>& permute_from_lids,
552 const LocalGraph& local_graph,
553 const LocalMap& local_col_map,
556 using Kokkos::parallel_for;
557 using device_type =
typename LocalMap::device_type;
558 using LO =
typename LocalMap::local_ordinal_type;
559 using execution_space =
typename device_type::execution_space;
560 using size_type =
typename Kokkos::View<LO*,device_type>::size_type;
561 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
563 const size_type num_permute_to_lids = permute_to_lids.extent(0);
566 range_policy(0, num_permute_to_lids),
567 KOKKOS_LAMBDA(
const size_t i) {
568 using atomic_incr_type =
typename std::remove_reference<decltype(new_start_row(0))>::type;
570 const LO src_lid = permute_from_lids(i);
571 const size_t src_row = local_graph.row_map(src_lid);
573 const LO tgt_lid = permute_to_lids(i);
574 const size_t tgt_row = tgt_rowptr(tgt_lid);
576 size_t nsr = local_graph.row_map(src_lid+1)
577 - local_graph.row_map(src_lid);
578 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
580 for (
size_t j=local_graph.row_map(src_lid);
581 j<local_graph.row_map(src_lid+1); ++j) {
582 LO src_col = local_graph.entries(j);
583 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
584 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
590 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDevice>
592 unpackAndCombineIntoCrsArrays2(
593 const Kokkos::View<typename LocalMap::global_ordinal_type*, typename LocalMap::device_type>& tgt_colind,
594 const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
595 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
596 const Kokkos::View<const size_t*, typename LocalMap::device_type>& offsets,
598 const typename LocalMap::local_ordinal_type*,
600 Kokkos::MemoryUnmanaged>& import_lids,
601 const Kokkos::View<const Packet*, BufferDevice>& imports,
602 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
608 using Kokkos::subview;
609 using Kokkos::MemoryUnmanaged;
610 using Kokkos::parallel_reduce;
611 using Kokkos::atomic_fetch_add;
613 using device_type =
typename LocalMap::device_type;
614 using LO =
typename LocalMap::local_ordinal_type;
615 using GO =
typename LocalMap::global_ordinal_type;
616 using execution_space =
typename device_type::execution_space;
617 using size_type =
typename Kokkos::View<LO*, device_type>::size_type;
618 using slice =
typename Kokkos::pair<size_type, size_type>;
619 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
621 using pids_out_type = View<int*,device_type, MemoryUnmanaged>;
622 using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
624 const size_type num_import_lids = import_lids.size();
625 const char prefix[] =
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays2: ";
629 parallel_reduce(
"Unpack and combine into CRS",
630 range_policy(0, num_import_lids),
631 KOKKOS_LAMBDA(
const size_t i,
int& err) {
632 using atomic_incr_type =
typename std::remove_reference< decltype( new_start_row(0) )>::type;
633 const size_t num_packets_this_lid = num_packets_per_lid(i);
634 const size_t num_ent = num_packets_this_lid / 2;
635 const size_t offset = offsets(i);
636 const LO lcl_row = import_lids(i);
637 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
638 const size_t end_row = start_row + num_ent;
640 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
641 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
643 err += unpackRow (gids_out, pids_out, imports, offset, num_ent);
646 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
647 const int pid = pids_out(j);
648 pids_out(j) = (pid != my_pid) ? pid : -1;
652 TEUCHOS_TEST_FOR_EXCEPTION(gbl_err_count != 0,
653 std::invalid_argument, prefix <<
654 "Attempting to unpack PIDs, but num_ent is not even; this should never "
655 "happen! Please report this bug to the Tpetra developers.");
660 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDevice>
663 const LocalGraph & local_graph,
664 const LocalMap & local_col_map,
665 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
667 Kokkos::MemoryUnmanaged>& import_lids,
668 const Kokkos::View<const Packet*, BufferDevice>& imports,
669 const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
670 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
672 Kokkos::MemoryUnmanaged>& permute_to_lids,
673 const Kokkos::View<
const typename LocalMap::local_ordinal_type*,
675 Kokkos::MemoryUnmanaged>& permute_from_lids,
676 const Kokkos::View<
size_t*,
677 typename LocalMap::device_type,
678 Kokkos::MemoryUnmanaged>& tgt_rowptr,
679 const Kokkos::View<
typename LocalMap::global_ordinal_type*,
680 typename LocalMap::device_type,
681 Kokkos::MemoryUnmanaged>& tgt_colind,
682 const Kokkos::View<
const int*,
683 typename LocalMap::device_type,
684 Kokkos::MemoryUnmanaged>& src_pids,
685 const Kokkos::View<
int*,
686 typename LocalMap::device_type,
687 Kokkos::MemoryUnmanaged>& tgt_pids,
688 const size_t num_same_ids,
689 const size_t tgt_num_rows,
690 const size_t tgt_num_nonzeros,
691 const int my_tgt_pid)
694 using Kokkos::subview;
695 using Kokkos::parallel_for;
696 using Kokkos::MemoryUnmanaged;
697 using packet_type = Packet;
698 using local_map_type = LocalMap;
699 using local_graph_type = LocalGraph;
700 using buffer_device_type = BufferDevice;
701 using device_type =
typename LocalMap::device_type;
702 using LO =
typename LocalMap::local_ordinal_type;
703 using execution_space =
typename device_type::execution_space;
704 using size_type =
typename Kokkos::View<LO*, device_type>::size_type;
705 using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
707 const char prefix[] =
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays: ";
709 const size_t N = tgt_num_rows;
710 const size_t mynnz = tgt_num_nonzeros;
714 const int my_pid = my_tgt_pid;
723 range_policy(0, N+1),
724 KOKKOS_LAMBDA(
const size_t i) {
731 range_policy(0, num_same_ids),
732 KOKKOS_LAMBDA(
const size_t i) {
733 const LO tgt_lid =
static_cast<LO
>(i);
734 const LO src_lid =
static_cast<LO
>(i);
735 tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
736 - local_graph.row_map(src_lid);
741 const size_type num_permute_to_lids = permute_to_lids.extent(0);
743 range_policy(0, num_permute_to_lids),
744 KOKKOS_LAMBDA(
const size_t i) {
745 const LO tgt_lid = permute_to_lids(i);
746 const LO src_lid = permute_from_lids(i);
747 tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid+1)
748 - local_graph.row_map(src_lid);
753 const size_type num_import_lids = import_lids.extent(0);
754 View<size_t*, device_type> offsets(
"offsets", num_import_lids+1);
757 #ifdef HAVE_TPETRA_DEBUG
759 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
760 const bool condition =
761 nth_offset_h !=
static_cast<size_t>(imports.extent(0));
762 TEUCHOS_TEST_FOR_EXCEPTION
763 (condition, std::logic_error, prefix
764 <<
"The final offset in bytes " << nth_offset_h
765 <<
" != imports.size() = " << imports.extent(0)
766 <<
". Please report this bug to the Tpetra developers.");
768 #endif // HAVE_TPETRA_DEBUG
771 setupRowPointersForRemotes<packet_type,LO,device_type,buffer_device_type>(
772 tgt_rowptr, import_lids, imports, num_packets_per_lid);
776 View<size_t*, device_type> new_start_row(
"new_start_row", N+1);
779 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
781 auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
782 bool condition = nth_tgt_rowptr_h != mynnz;
783 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
784 prefix <<
"CRS_rowptr[last] = " <<
785 nth_tgt_rowptr_h <<
"!= mynnz = " << mynnz <<
".");
789 copyDataFromSameIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
790 tgt_rowptr, src_pids, local_graph, local_col_map, num_same_ids, my_pid);
792 copyDataFromPermuteIDs<LocalGraph,LocalMap>(tgt_colind, tgt_pids, new_start_row,
793 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
794 local_graph, local_col_map, my_pid);
796 if (imports.extent(0) <= 0) {
800 unpackAndCombineIntoCrsArrays2<
801 packet_type,local_graph_type,local_map_type,buffer_device_type>(
802 tgt_colind, tgt_pids, new_start_row, offsets, import_lids, imports,
803 num_packets_per_lid, local_graph, local_col_map, my_pid);
859 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
863 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
865 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
870 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
871 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
873 using Kokkos::MemoryUnmanaged;
875 using device_type =
typename Node::device_type;
879 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
881 TEUCHOS_TEST_FOR_EXCEPTION
882 (permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
883 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size() <<
" != "
884 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
888 TEUCHOS_TEST_FOR_EXCEPTION
889 (! locallyIndexed, std::invalid_argument, prefix <<
"The input "
890 "CrsGraph 'sourceGraph' must be locally indexed.");
891 TEUCHOS_TEST_FOR_EXCEPTION
892 (importLIDs.size() != numPacketsPerLID.size(), std::invalid_argument,
893 prefix <<
"importLIDs.size() = " << importLIDs.size() <<
" != "
894 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
897 auto permute_from_lids_d =
899 permuteFromLIDs.getRawPtr(),
900 permuteFromLIDs.size(),
true,
901 "permute_from_lids");
905 imports.size(),
true,
907 auto num_packets_per_lid_d =
909 numPacketsPerLID.getRawPtr(),
910 numPacketsPerLID.size(),
true,
911 "num_packets_per_lid");
914 packet_type,local_graph_type,buffer_device_type>(
915 local_graph, permute_from_lids_d, imports_d, num_packets_per_lid_d, numSameIDs);
931 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
935 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
937 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
941 const size_t numSameIDs,
942 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
943 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
944 size_t TargetNumRows,
945 size_t TargetNumNonzeros,
946 const int MyTargetPID,
947 const Teuchos::ArrayView<size_t>& CRS_rowptr,
948 const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
949 const Teuchos::ArrayView<const int>& SourcePids,
950 Teuchos::Array<int>& TargetPids)
954 using Teuchos::outArg;
955 using Teuchos::REDUCE_MAX;
956 using Teuchos::reduceAll;
957 using LO = LocalOrdinal;
958 using GO = GlobalOrdinal;
960 using packet_type =
typename crs_graph_type::packet_type;
961 using local_graph_type =
typename crs_graph_type::local_graph_type;
962 using buffer_device_type =
typename crs_graph_type::buffer_device_type;
963 using device_type =
typename Node::device_type;
964 using size_type =
typename Teuchos::ArrayView<const LO>::size_type;
966 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
968 TEUCHOS_TEST_FOR_EXCEPTION(
969 TargetNumRows + 1 != static_cast<size_t>(CRS_rowptr.size()),
970 std::invalid_argument, prefix <<
"CRS_rowptr.size() = " <<
971 CRS_rowptr.size() <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
973 TEUCHOS_TEST_FOR_EXCEPTION(
974 permuteToLIDs.size() != permuteFromLIDs.size(), std::invalid_argument,
975 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size()
976 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
977 const size_type numImportLIDs = importLIDs.size();
979 TEUCHOS_TEST_FOR_EXCEPTION(
980 numImportLIDs != numPacketsPerLID.size(), std::invalid_argument,
981 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != "
982 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
985 if (static_cast<size_t>(TargetPids.size()) != TargetNumNonzeros) {
986 TargetPids.resize(TargetNumNonzeros);
988 TargetPids.assign(TargetNumNonzeros, -1);
992 auto local_col_map = sourceGraph.
getColMap()->getLocalMap();
995 device_type outputDevice;
996 buffer_device_type bufferOutputDevice;
998 Kokkos::View<const LO*, buffer_device_type> import_lids_d =
1000 (bufferOutputDevice, importLIDs.getRawPtr(),
1001 importLIDs.size(),
true,
"import_lids");
1003 Kokkos::View<const packet_type*, buffer_device_type> imports_d =
1005 (bufferOutputDevice, imports.getRawPtr(),
1006 imports.size(),
true,
"imports");
1008 Kokkos::View<const size_t*, buffer_device_type> num_packets_per_lid_d =
1010 numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
1011 true,
"num_packets_per_lid");
1013 Kokkos::View<const LO*, buffer_device_type> permute_to_lids_d =
1015 permuteToLIDs.getRawPtr(), permuteToLIDs.size(),
1016 true,
"permute_to_lids");
1018 Kokkos::View<const LO*, buffer_device_type> permute_from_lids_d =
1020 permuteFromLIDs.getRawPtr(), permuteFromLIDs.size(),
1021 true,
"permute_from_lids");
1023 Kokkos::View<size_t*, device_type> crs_rowptr_d =
1025 CRS_rowptr.getRawPtr(), CRS_rowptr.size(),
1026 true,
"crs_rowptr");
1028 Kokkos::View<GO*, device_type> crs_colind_d =
1030 CRS_colind.getRawPtr(), CRS_colind.size(),
1031 true,
"crs_colidx");
1033 Kokkos::View<const int*, device_type> src_pids_d =
1035 SourcePids.getRawPtr(), SourcePids.size(),
1038 Kokkos::View<int*, device_type> tgt_pids_d =
1040 TargetPids.getRawPtr(), TargetPids.size(),
1043 using local_map_type = decltype(local_col_map);
1045 packet_type,local_graph_type,local_map_type,buffer_device_type>(
1046 local_graph, local_col_map, import_lids_d, imports_d, num_packets_per_lid_d,
1047 permute_to_lids_d, permute_from_lids_d, crs_rowptr_d, crs_colind_d, src_pids_d,
1048 tgt_pids_d, numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID);
1053 typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1054 CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1057 typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1058 CRS_colind.getRawPtr(), CRS_colind.size());
1061 typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1062 TargetPids.getRawPtr(), TargetPids.size());
1070 #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT( LO, GO, NT ) \
1072 Details::unpackAndCombineIntoCrsArrays<LO, GO, NT>( \
1073 const CrsGraph<LO, GO, NT> &, \
1074 const Teuchos::ArrayView<const LO>&, \
1075 const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type>&, \
1076 const Teuchos::ArrayView<const size_t>&, \
1079 const CombineMode, \
1081 const Teuchos::ArrayView<const LO>&, \
1082 const Teuchos::ArrayView<const LO>&, \
1086 const Teuchos::ArrayView<size_t>&, \
1087 const Teuchos::ArrayView<GO>&, \
1088 const Teuchos::ArrayView<const int>&, \
1089 Teuchos::Array<int>&); \
1091 Details::unpackAndCombineWithOwningPIDsCount<LO, GO, NT>( \
1092 const CrsGraph<LO, GO, NT> &, \
1093 const Teuchos::ArrayView<const LO> &, \
1094 const Teuchos::ArrayView<const typename CrsGraph<LO,GO,NT>::packet_type> &, \
1095 const Teuchos::ArrayView<const size_t>&, \
1100 const Teuchos::ArrayView<const LO>&, \
1101 const Teuchos::ArrayView<const LO>&);
1103 #endif // TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
Declaration of the Tpetra::CrsGraph class.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
Functions for manipulating CRS arrays.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Unpacks and combines a single row of the CrsGraph.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
local_graph_type getLocalGraph() const
Get the local graph.
Declaration and definition of Tpetra::Details::getEntryOnHost.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.