17 #ifndef KOKKOS_STATICCRSGRAPH_HPP
18 #define KOKKOS_STATICCRSGRAPH_HPP
19 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
20 #define KOKKOS_IMPL_PUBLIC_INCLUDE
21 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH
24 #include <Kokkos_Macros.hpp>
26 #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
27 #if defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS) && \
28 !defined(KOKKOS_IMPL_DO_NOT_WARN_INCLUDE_STATIC_CRS_GRAPH)
30 [[deprecated(
"Deprecated <Kokkos_StaticCrsGraph.hpp> header is included")]]
int
31 emit_warning_kokkos_static_crs_graph_deprecated() {
34 static auto do_not_include = emit_warning_kokkos_static_crs_graph_deprecated();
38 #error "Deprecated <Kokkos_StaticCrsGraph.hpp> header is included"
44 #include <Kokkos_View.hpp>
46 #include <Kokkos_Parallel_Reduce.hpp>
51 template <
class RowOffsetsType,
class RowBlockOffsetsType>
52 struct StaticCrsGraphBalancerFunctor {
53 using int_type =
typename RowOffsetsType::non_const_value_type;
54 RowOffsetsType row_offsets;
55 RowBlockOffsetsType row_block_offsets;
57 int_type cost_per_row, num_blocks;
59 StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
60 RowBlockOffsetsType row_block_offsets_,
61 int_type cost_per_row_, int_type num_blocks_)
62 : row_offsets(row_offsets_),
63 row_block_offsets(row_block_offsets_),
64 cost_per_row(cost_per_row_),
65 num_blocks(num_blocks_) {}
67 KOKKOS_INLINE_FUNCTION
68 void operator()(
const int_type& iRow)
const {
69 const int_type num_rows = row_offsets.extent(0) - 1;
70 const int_type num_entries = row_offsets(num_rows);
71 const int_type total_cost = num_entries + num_rows * cost_per_row;
73 const double cost_per_workset = 1.0 * total_cost / num_blocks;
75 const int_type row_cost =
76 row_offsets(iRow + 1) - row_offsets(iRow) + cost_per_row;
78 int_type count = row_offsets(iRow + 1) + cost_per_row * iRow;
80 if (iRow == num_rows - 1) row_block_offsets(num_blocks) = num_rows;
83 int_type current_block =
84 (count - row_cost - cost_per_row) / cost_per_workset;
85 int_type end_block = count / cost_per_workset;
88 if (current_block >= num_blocks - 2) {
89 if ((current_block == num_blocks - 2) &&
90 (count >= (current_block + 1) * cost_per_workset)) {
92 int_type cc = count - row_cost - cost_per_row;
93 int_type block = cc / cost_per_workset;
94 while ((block > 0) && (block == current_block)) {
95 cc = row_offsets(row) + row * cost_per_row;
96 block = cc / cost_per_workset;
99 if ((count - cc - row_cost - cost_per_row) <
100 num_entries - row_offsets(iRow + 1)) {
101 row_block_offsets(current_block + 1) = iRow + 1;
103 row_block_offsets(current_block + 1) = iRow;
107 if ((count >= (current_block + 1) * cost_per_workset) ||
108 (iRow + 2 == int_type(row_offsets.extent(0)))) {
109 if (end_block > current_block + 1) {
110 int_type num_block = end_block - current_block;
111 row_block_offsets(current_block + 1) = iRow;
112 for (int_type block = current_block + 2; block <= end_block;
114 if ((block < current_block + 2 + (num_block - 1) / 2))
115 row_block_offsets(block) = iRow;
117 row_block_offsets(block) = iRow + 1;
119 row_block_offsets(current_block + 1) = iRow + 1;
163 template <
class GraphType>
188 KOKKOS_INLINE_FUNCTION
191 : colidx_(colidx_in), stride_(stride),
length(count) {}
205 template <
class OffsetType>
207 const typename GraphType::entries_type& colidx_in,
209 const OffsetType& idx,
210 const std::enable_if_t<std::is_integral_v<OffsetType>,
int>& = 0)
211 : colidx_(&colidx_in(idx)), stride_(stride),
length(count) {}
230 KOKKOS_INLINE_FUNCTION
232 return colidx_[i * stride_];
236 KOKKOS_INLINE_FUNCTION
273 template <
class DataType,
class Arg1Type,
class Arg2Type = void,
274 class Arg3Type = void,
275 typename SizeType =
typename ViewTraits<DataType*, Arg1Type, Arg2Type,
276 Arg3Type>::size_type>
279 using traits = ViewTraits<DataType*, Arg1Type, Arg2Type, Arg3Type>;
282 using data_type = DataType;
283 using array_layout =
typename traits::array_layout;
284 using execution_space =
typename traits::execution_space;
285 using device_type =
typename traits::device_type;
286 using memory_traits =
typename traits::memory_traits;
287 using size_type = SizeType;
292 typename traits::host_mirror_space,
293 memory_traits, size_type>;
296 View<const size_type*, array_layout, device_type, memory_traits>;
298 View<data_type*, array_layout, device_type, memory_traits>;
299 using row_block_type =
300 View<const size_type*, array_layout, device_type, memory_traits>;
302 entries_type entries;
303 row_map_type row_map;
304 row_block_type row_block_offsets;
307 KOKKOS_INLINE_FUNCTION
311 KOKKOS_INLINE_FUNCTION
313 : entries(rhs.entries),
314 row_map(rhs.row_map),
315 row_block_offsets(rhs.row_block_offsets) {}
317 template <
class EntriesType,
class RowMapType>
318 KOKKOS_INLINE_FUNCTION
StaticCrsGraph(
const EntriesType& entries_,
319 const RowMapType& row_map_)
320 : entries(entries_), row_map(row_map_), row_block_offsets() {}
326 KOKKOS_INLINE_FUNCTION
328 entries = rhs.entries;
329 row_map = rhs.row_map;
330 row_block_offsets = rhs.row_block_offsets;
337 KOKKOS_DEFAULTED_FUNCTION
342 KOKKOS_INLINE_FUNCTION
344 return (row_map.extent(0) != 0)
345 ? row_map.extent(0) -
static_cast<size_type
>(1)
346 : static_cast<size_type>(0);
349 KOKKOS_INLINE_FUNCTION constexpr
bool is_allocated()
const {
350 return (row_map.is_allocated() && entries.is_allocated());
371 KOKKOS_INLINE_FUNCTION
373 const size_type start = row_map(i);
376 const data_type count =
static_cast<data_type
>(row_map(i + 1) - start);
389 size_type fix_cost_per_row = 4) {
390 View<size_type*, array_layout, device_type> block_offsets(
391 "StatisCrsGraph::load_balance_offsets", num_blocks + 1);
393 Impl::StaticCrsGraphBalancerFunctor<
394 row_map_type, View<size_type*, array_layout, device_type> >
395 partitioner(row_map, block_offsets, fix_cost_per_row, num_blocks);
397 Kokkos::parallel_for(
"Kokkos::StaticCrsGraph::create_block_partitioning",
400 typename device_type::execution_space().fence(
401 "Kokkos::StaticCrsGraph::create_block_partitioning:: fence after "
404 row_block_offsets = block_offsets;
410 template <
class StaticCrsGraphType,
class InputSizeType>
411 typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
412 const std::string& label,
const std::vector<InputSizeType>& input);
414 template <
class StaticCrsGraphType,
class InputSizeType>
415 typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
416 const std::string& label,
417 const std::vector<std::vector<InputSizeType> >& input);
421 template <
class DataType,
class Arg1Type,
class Arg2Type,
class Arg3Type,
423 typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
424 SizeType>::HostMirror
425 create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
428 template <
class DataType,
class Arg1Type,
class Arg2Type,
class Arg3Type,
430 typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
431 SizeType>::HostMirror
432 create_mirror(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
440 #include <impl/Kokkos_StaticCrsGraph_factory.hpp>
448 template <
class GraphType>
449 struct StaticCrsGraphMaximumEntry {
450 using execution_space =
typename GraphType::execution_space;
451 using value_type =
typename GraphType::data_type;
453 const typename GraphType::entries_type entries;
455 StaticCrsGraphMaximumEntry(
const GraphType& graph) : entries(graph.entries) {}
457 KOKKOS_INLINE_FUNCTION
458 void operator()(
const unsigned i, value_type& update)
const {
459 if (update < entries(i)) update = entries(i);
462 KOKKOS_INLINE_FUNCTION
463 void init(value_type& update)
const { update = 0; }
465 KOKKOS_INLINE_FUNCTION
466 void join(value_type& update,
const value_type& input)
const {
467 if (update < input) update = input;
473 template <
class DataType,
class Arg1Type,
class Arg2Type,
class Arg3Type,
475 DataType maximum_entry(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
476 Arg3Type, SizeType>& graph) {
478 StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
479 using FunctorType = Impl::StaticCrsGraphMaximumEntry<GraphType>;
482 Kokkos::parallel_reduce(
"Kokkos::maximum_entry", graph.entries.extent(0),
483 FunctorType(graph), result);
492 #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH
493 #undef KOKKOS_IMPL_PUBLIC_INCLUDE
494 #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH
KOKKOS_INLINE_FUNCTION GraphRowViewConst< StaticCrsGraph > rowConst(const data_type i) const
Return a const view of row i of the graph.
KOKKOS_INLINE_FUNCTION StaticCrsGraph()
Construct an empty view.
const ordinal_type length
Number of entries in the row.
KOKKOS_INLINE_FUNCTION StaticCrsGraph(const StaticCrsGraph &rhs)
Copy constructor (shallow copy).
KOKKOS_INLINE_FUNCTION GraphRowViewConst(ordinal_type *const colidx_in, const ordinal_type &stride, const ordinal_type &count)
Constructor.
KOKKOS_INLINE_FUNCTION ordinal_type & colidx(const ordinal_type &i) const
(Const) reference to the column index of entry i in this row of the sparse matrix.
KOKKOS_DEFAULTED_FUNCTION ~StaticCrsGraph()=default
Destroy this view of the array. If the last view then allocated memory is deallocated.
const typename GraphType::data_type ordinal_type
The type of the column indices in the row.
KOKKOS_INLINE_FUNCTION ordinal_type & operator()(const ordinal_type &i) const
An alias for colidx.
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row=4)
Create a row partitioning into a given number of blocks balancing non-zeros + a fixed cost per row...
Declaration of parallel operators.
KOKKOS_INLINE_FUNCTION size_type numRows() const
Return number of rows in the graph.
Execution policy for work over a range of an integral type.
Compressed row storage array.
View of a row of a sparse graph.
KOKKOS_INLINE_FUNCTION StaticCrsGraph & operator=(const StaticCrsGraph &rhs)
Assign to a view of the rhs array. If the old view is the last view then allocated memory is dealloca...
KOKKOS_INLINE_FUNCTION GraphRowViewConst(const typename GraphType::entries_type &colidx_in, const ordinal_type &stride, const ordinal_type &count, const OffsetType &idx, const std::enable_if_t< std::is_integral_v< OffsetType >, int > &=0)
Constructor with offset into colidx array.