Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_packCrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
52 #include "Tpetra_CrsGraph_decl.hpp"
53 #include <memory>
54 #include <string>
55 
77 
78 namespace Tpetra {
79 
80 #ifndef DOXYGEN_SHOULD_SKIP_THIS
81 // Forward declaration of Distributor
82 class Distributor;
83 #endif // DOXYGEN_SHOULD_SKIP_THIS
84 
85 //
86 // Users must never rely on anything in the Details namespace.
87 //
88 namespace Details {
89 
90 namespace PackCrsGraphImpl {
98 template<class OutputOffsetsViewType,
99  class CountsViewType,
100  class InputOffsetsViewType,
101  class InputLocalRowIndicesViewType,
102  class InputLocalRowPidsViewType,
103  const bool debug =
104 #ifdef HAVE_TPETRA_DEBUG
105  true
106 #else
107  false
108 #endif // HAVE_TPETRA_DEBUG
109  >
111 public:
112  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
113  typedef typename CountsViewType::non_const_value_type count_type;
114  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
115  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
116  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117  // output Views drive where execution happens.
118  typedef typename OutputOffsetsViewType::device_type device_type;
119  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
120  typename device_type::execution_space>::value,
121  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
122  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
123  "OutputOffsetsViewType must be a Kokkos::View.");
124  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
125  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
126  static_assert (std::is_integral<output_offset_type>::value,
127  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
128  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
129  "CountsViewType must be a Kokkos::View.");
130  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
131  "CountsViewType must be a nonconst Kokkos::View.");
132  static_assert (std::is_integral<count_type>::value,
133  "The type of each entry of CountsViewType must be a built-in integer type.");
134  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
135  "InputOffsetsViewType must be a Kokkos::View.");
136  static_assert (std::is_integral<input_offset_type>::value,
137  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
138  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
139  "InputLocalRowIndicesViewType must be a Kokkos::View.");
140  static_assert (std::is_integral<local_row_index_type>::value,
141  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
142 
143  NumPacketsAndOffsetsFunctor(const OutputOffsetsViewType& outputOffsets,
144  const CountsViewType& counts,
145  const InputOffsetsViewType& rowOffsets,
146  const InputLocalRowIndicesViewType& lclRowInds,
147  const InputLocalRowPidsViewType& lclRowPids) :
148  outputOffsets_ (outputOffsets),
149  counts_ (counts),
150  rowOffsets_ (rowOffsets),
151  lclRowInds_ (lclRowInds),
152  lclRowPids_ (lclRowPids),
153  error_ ("error") // don't forget this, or you'll get segfaults!
154  {
155  if (debug) {
156  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
157 
158  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
159  std::ostringstream os;
160  os << "lclRowInds.extent(0) = " << numRowsToPack
161  << " != counts.extent(0) = " << counts_.extent (0)
162  << ".";
163  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
164  }
165  if (static_cast<size_t> (numRowsToPack + 1) !=
166  static_cast<size_t> (outputOffsets_.extent (0))) {
167  std::ostringstream os;
168  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
169  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
170  << ".";
171  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
172  }
173  }
174  }
175 
176  KOKKOS_INLINE_FUNCTION void
177  operator() (const local_row_index_type& curInd,
178  output_offset_type& update,
179  const bool final) const
180  {
181  if (debug) {
182  if (curInd < static_cast<local_row_index_type> (0)) {
183  error_ () = 1;
184  return;
185  }
186  }
187 
188  if (final) {
189  if (debug) {
190  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
191  error_ () = 2;
192  return;
193  }
194  }
195  outputOffsets_(curInd) = update;
196  }
197 
198  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
199  const auto lclRow = lclRowInds_(curInd);
200  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
201  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
202  error_ () = 3;
203  return;
204  }
205  // count_type could differ from the type of each row offset.
206  // For example, row offsets might each be 64 bits, but if their
207  // difference always fits in 32 bits, we may then safely use a
208  // 32-bit count_type.
209  const count_type count =
210  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
211 
212  // We pack first the global column indices and then pids (if any),
213  // However, if the number of entries in the row is zero, we pack nothing.
214  const count_type numEntToPack = (count == 0)
215  ? static_cast<count_type>(0)
216  : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
217 
218  if (final) {
219  counts_(curInd) = numEntToPack;
220  }
221  update += numEntToPack;
222  }
223  }
224 
225  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
226  // have join both with and without volatile! Otherwise intrawarp
227  // joins are really slow on GPUs.
228 
230  int getError () const {
231  auto error_h = Kokkos::create_mirror_view (error_);
232  Kokkos::deep_copy (error_h, error_);
233  return error_h ();
234  }
235 
236 private:
237  OutputOffsetsViewType outputOffsets_;
238  CountsViewType counts_;
239  typename InputOffsetsViewType::const_type rowOffsets_;
240  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
241  typename InputLocalRowPidsViewType::const_type lclRowPids_;
242  Kokkos::View<int, device_type> error_;
243 };
244 
254 template<class OutputOffsetsViewType,
255  class CountsViewType,
256  class InputOffsetsViewType,
257  class InputLocalRowIndicesViewType,
258  class InputLocalRowPidsViewType>
259 typename CountsViewType::non_const_value_type
260 computeNumPacketsAndOffsets(const OutputOffsetsViewType& outputOffsets,
261  const CountsViewType& counts,
262  const InputOffsetsViewType& rowOffsets,
263  const InputLocalRowIndicesViewType& lclRowInds,
264  const InputLocalRowPidsViewType& lclRowPids)
265 {
266  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
267  CountsViewType, typename InputOffsetsViewType::const_type,
268  typename InputLocalRowIndicesViewType::const_type,
269  typename InputLocalRowPidsViewType::const_type> functor_type;
270  typedef typename CountsViewType::non_const_value_type count_type;
271  typedef typename OutputOffsetsViewType::size_type size_type;
272  typedef typename OutputOffsetsViewType::execution_space execution_space;
273  typedef typename functor_type::local_row_index_type LO;
274  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
275  const char prefix[] = "computeNumPacketsAndOffsets: ";
276 
277  count_type count = 0;
278  const count_type numRowsToPack = lclRowInds.extent (0);
279 
280  if (numRowsToPack == 0) {
281  return count;
282  }
283  else {
284  TEUCHOS_TEST_FOR_EXCEPTION
285  (rowOffsets.extent (0) <= static_cast<size_type> (1),
286  std::invalid_argument, prefix << "There is at least one row to pack, "
287  "but the graph has no rows. lclRowInds.extent(0) = " <<
288  numRowsToPack << ", but rowOffsets.extent(0) = " <<
289  rowOffsets.extent (0) << " <= 1.");
290  TEUCHOS_TEST_FOR_EXCEPTION
291  (outputOffsets.extent (0) !=
292  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
293  prefix << "Output dimension does not match number of rows to pack. "
294  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
295  << " != lclRowInds.extent(0) + 1 = "
296  << static_cast<size_type> (numRowsToPack + 1) << ".");
297  TEUCHOS_TEST_FOR_EXCEPTION
298  (counts.extent (0) != numRowsToPack, std::invalid_argument,
299  prefix << "counts.extent(0) = " << counts.extent (0)
300  << " != numRowsToPack = " << numRowsToPack << ".");
301 
302  functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
303  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
304 
305  // At least in debug mode, this functor checks for errors.
306  const int errCode = f.getError ();
307  TEUCHOS_TEST_FOR_EXCEPTION
308  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
309  << errCode << " != 0.");
310 
311 #if 0
312  size_t total = 0;
313  for (LO k = 0; k < numRowsToPack; ++k) {
314  total += counts[k];
315  }
316  if (outputOffsets(numRowsToPack) != total) {
317  if (errStr.get () == NULL) {
318  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
319  }
320  std::ostringstream& os = *errStr;
321  os << prefix
322  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
323  << outputOffsets(numRowsToPack) << " != sum of counts = "
324  << total << "." << std::endl;
325  if (numRowsToPack != 0) {
326  // Only print the array if it's not too long.
327  if (numRowsToPack < static_cast<LO> (10)) {
328  os << "outputOffsets: [";
329  for (LO i = 0; i <= numRowsToPack; ++i) {
330  os << outputOffsets(i);
331  if (static_cast<LO> (i + 1) <= numRowsToPack) {
332  os << ",";
333  }
334  }
335  os << "]" << std::endl;
336  os << "counts: [";
337  for (LO i = 0; i < numRowsToPack; ++i) {
338  os << counts(i);
339  if (static_cast<LO> (i + 1) < numRowsToPack) {
340  os << ",";
341  }
342  }
343  os << "]" << std::endl;
344  }
345  else {
346  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
347  << outputOffsets(numRowsToPack-1) << "." << std::endl;
348  }
349  }
350  count = outputOffsets(numRowsToPack);
351  return {false, errStr};
352  }
353 #endif // HAVE_TPETRA_DEBUG
354 
355  // Get last entry of outputOffsets, which is the sum of the entries
356  // of counts. Don't assume UVM.
357  using Tpetra::Details::getEntryOnHost;
358  return static_cast<count_type> (getEntryOnHost (outputOffsets,
359  numRowsToPack));
360  }
361 }
362 
373 template<class Packet,
374  class LocalMapType,
375  class BufferDeviceType,
376  class InputLidsType,
377  class InputPidsType>
378 KOKKOS_FUNCTION
379 size_t
380 packRow(const LocalMapType& col_map,
381  const Kokkos::View<Packet*, BufferDeviceType>& exports,
382  const InputLidsType& lids_in,
383  const InputPidsType& pids_in,
384  const size_t offset,
385  const size_t num_ent,
386  const bool pack_pids)
387 {
388  using LO = typename LocalMapType::local_ordinal_type;
389  using GO = typename LocalMapType::global_ordinal_type;
390 
391  if (num_ent == 0) {
392  // Empty rows always take zero bytes, to ensure sparsity.
393  return static_cast<size_t>(0);
394  }
395 
396  size_t num_ent_packed = num_ent;
397  if (pack_pids) {
398  num_ent_packed += num_ent;
399  }
400 
401  // Copy column indices one at a time, so that we don't need
402  // temporary storage.
403  for (size_t k = 0; k < num_ent; ++k) {
404  const LO lid = lids_in[k];
405  const GO gid = col_map.getGlobalElement (lid);
406  exports(offset+k) = gid;
407  }
408  // Copy PIDs one at a time, so that we don't need temporary storage.
409  if (pack_pids) {
410  for (size_t k = 0; k < num_ent; ++k) {
411  const LO lid = lids_in[k];
412  const int pid = pids_in[lid];
413  exports(offset+num_ent+k) = static_cast<GO>(pid);
414  }
415  }
416 
417  return num_ent_packed;
418 }
419 
420 template<class Packet,
421  class LocalGraph,
422  class LocalMap,
423  class BufferDeviceType>
424 struct PackCrsGraphFunctor {
425  using local_graph_type = LocalGraph;
426  using local_map_type = LocalMap;
427  using LO = typename local_map_type::local_ordinal_type;
428  using GO = typename local_map_type::global_ordinal_type;
429 
430  using num_packets_per_lid_view_type =
431  Kokkos::View<const size_t*, BufferDeviceType>;
432  using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
433  using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
434  using export_lids_view_type =
436  using source_pids_view_type =
438 
439  using count_type =
440  typename num_packets_per_lid_view_type::non_const_value_type;
441  using offset_type = typename offsets_view_type::non_const_value_type;
442  using value_type = Kokkos::pair<int, LO>;
443 
444  static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
445  "local_map_type::local_ordinal_type and "
446  "local_graph_type::data_type must be the same.");
447 
448  local_graph_type local_graph;
449  local_map_type local_col_map;
450  exports_view_type exports;
451  num_packets_per_lid_view_type num_packets_per_lid;
452  export_lids_view_type export_lids;
453  source_pids_view_type source_pids;
454  offsets_view_type offsets;
455  bool pack_pids;
456 
457  PackCrsGraphFunctor(const local_graph_type& local_graph_in,
458  const local_map_type& local_col_map_in,
459  const exports_view_type& exports_in,
460  const num_packets_per_lid_view_type& num_packets_per_lid_in,
461  const export_lids_view_type& export_lids_in,
462  const source_pids_view_type& source_pids_in,
463  const offsets_view_type& offsets_in,
464  const bool pack_pids_in) :
465  local_graph (local_graph_in),
466  local_col_map (local_col_map_in),
467  exports (exports_in),
468  num_packets_per_lid (num_packets_per_lid_in),
469  export_lids (export_lids_in),
470  source_pids (source_pids_in),
471  offsets (offsets_in),
472  pack_pids (pack_pids_in)
473  {
474  const LO numRows = local_graph_in.numRows ();
475  const LO rowMapDim =
476  static_cast<LO> (local_graph.row_map.extent (0));
477  TEUCHOS_TEST_FOR_EXCEPTION
478  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
479  std::logic_error, "local_graph.row_map.extent(0) = "
480  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
481  }
482 
483  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
484  {
485  using ::Tpetra::Details::OrdinalTraits;
486  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
487  }
488 
489  KOKKOS_INLINE_FUNCTION void
490  join (volatile value_type& dst, const volatile value_type& src) const
491  {
492  // `dst` should reflect the first (least) bad index and all other
493  // associated error codes and data, so prefer keeping it.
494  if (src.first != 0 && dst.first == 0) {
495  dst = src;
496  }
497  }
498 
499  KOKKOS_INLINE_FUNCTION
500  void operator() (const LO i, value_type& dst) const
501  {
502  const size_t offset = offsets[i];
503  const LO export_lid = export_lids[i];
504  const size_t buf_size = exports.size();
505  const size_t num_packets_this_lid = num_packets_per_lid(i);
506  const size_t num_ent =
507  static_cast<size_t> (local_graph.row_map[export_lid+1]
508  - local_graph.row_map[export_lid]);
509 
510  // Only pack this row's data if it has a nonzero number of
511  // entries. We can do this because receiving processes get the
512  // number of packets, and will know that zero packets means zero
513  // entries.
514  if (num_ent == 0) {
515  return;
516  }
517 
518  if (export_lid >= static_cast<LO>(local_graph.numRows())) {
519  if (dst.first != 0) { // keep only the first error
520  dst = Kokkos::make_pair (1, i); // invalid row
521  }
522  return;
523  }
524  else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
525  if (dst.first != 0) { // keep only the first error
526  dst = Kokkos::make_pair (2, i); // out of bounds
527  }
528  return;
529  }
530 
531  // We can now pack this row
532 
533  // Since the graph is locally indexed on the calling process, we
534  // have to use its column Map (which it _must_ have in this case)
535  // to convert to global indices.
536  const auto row_beg = local_graph.row_map[export_lid];
537  const auto row_end = local_graph.row_map[export_lid + 1];
538  auto lids_in = Kokkos::subview (local_graph.entries,
539  Kokkos::make_pair (row_beg, row_end));
540  size_t num_ent_packed_this_row =
541  packRow (local_col_map, exports, lids_in,
542  source_pids, offset, num_ent, pack_pids);
543  if (num_ent_packed_this_row != num_packets_this_lid) {
544  if (dst.first != 0) { // keep only the first error
545  dst = Kokkos::make_pair (3, i);
546  }
547  }
548  }
549 };
550 
558 template<class Packet,
559  class LocalGraph,
560  class LocalMap,
561  class BufferDeviceType>
562 void
563 do_pack(const LocalGraph& local_graph,
564  const LocalMap& local_map,
565  const Kokkos::View<Packet*, BufferDeviceType>& exports,
566  const typename PackTraits<
567  size_t
568  >::input_array_type& num_packets_per_lid,
569  const typename PackTraits<
570  typename LocalMap::local_ordinal_type
571  >::input_array_type& export_lids,
572  const typename PackTraits<
573  int
574  >::input_array_type& source_pids,
575  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
576  const bool pack_pids)
577 {
578  using LO = typename LocalMap::local_ordinal_type;
579  using execution_space = typename LocalGraph::device_type::execution_space;
580  using range_type = Kokkos::RangePolicy<execution_space, LO>;
581  const char prefix[] = "Tpetra::Details::PackCrsGraphImpl::do_pack: ";
582 
583  if (export_lids.extent (0) != 0) {
584  TEUCHOS_TEST_FOR_EXCEPTION
585  (static_cast<size_t> (offsets.extent (0)) !=
586  static_cast<size_t> (export_lids.extent (0) + 1),
587  std::invalid_argument, prefix << "offsets.extent(0) = "
588  << offsets.extent (0) << " != export_lids.extent(0) (= "
589  << export_lids.extent (0) << ") + 1.");
590  TEUCHOS_TEST_FOR_EXCEPTION
591  (export_lids.extent (0) != num_packets_per_lid.extent (0),
592  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
593  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
594  << num_packets_per_lid.extent (0) << ".");
595  // If exports has nonzero length at this point, then the graph
596  // has at least one entry to pack. Thus, if packing process
597  // ranks, we had better have at least one process rank to pack.
598  TEUCHOS_TEST_FOR_EXCEPTION
599  (pack_pids && exports.extent (0) != 0 &&
600  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
601  "pack_pids is true, and exports.extent(0) = " <<
602  exports.extent (0) << " != 0, meaning that we need to pack at "
603  "least one graph entry, but source_pids.extent(0) = 0.");
604  }
605 
606  using pack_functor_type =
607  PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
608  BufferDeviceType>;
609  pack_functor_type f (local_graph, local_map, exports,
610  num_packets_per_lid, export_lids,
611  source_pids, offsets, pack_pids);
612 
613  typename pack_functor_type::value_type result;
614  range_type range (0, num_packets_per_lid.extent (0));
615  Kokkos::parallel_reduce (range, f, result);
616 
617  if (result.first != 0) {
618  // We can't deep_copy from AnonymousSpace Views, so we can't
619  // print out any information from them in case of error.
620  std::ostringstream os;
621  if (result.first == 1) { // invalid local row index
622  os << "invalid local row index";
623  }
624  else if (result.first == 2) { // invalid offset
625  os << "invalid offset";
626  }
627  TEUCHOS_TEST_FOR_EXCEPTION
628  (true, std::runtime_error, prefix << "PackCrsGraphFunctor "
629  "reported error code " << result.first << " (" << os.str ()
630  << ") for the first bad row " << result.second << ".");
631  }
632 }
633 
660 template<typename LO, typename GO, typename NT>
661 void
663 (const CrsGraph<LO,GO,NT>& sourceGraph,
664  Kokkos::DualView<
665  typename CrsGraph<LO,GO,NT>::packet_type*,
666  typename CrsGraph<LO,GO,NT>::buffer_device_type
667  >& exports,
668  const Kokkos::View<
669  size_t*,
670  typename CrsGraph<LO,GO,NT>::buffer_device_type
671  >& num_packets_per_lid,
672  const Kokkos::View<
673  const LO*,
674  typename CrsGraph<LO, GO, NT>::buffer_device_type
675  >& export_lids,
676  const Kokkos::View<
677  const int*,
678  typename CrsGraph<LO, GO, NT>::buffer_device_type
679  >& export_pids,
680  size_t& constant_num_packets,
681  const bool pack_pids,
682  Distributor& /* dist */)
683 {
684  using Kokkos::View;
685  using crs_graph_type = CrsGraph<LO, GO, NT>;
686  using packet_type = typename crs_graph_type::packet_type;
687  using buffer_device_type = typename crs_graph_type::buffer_device_type;
688  using execution_space = typename buffer_device_type::execution_space;
689  using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
690  using local_graph_type = typename crs_graph_type::local_graph_type;
691  using local_map_type = typename Tpetra::Map<LO, GO, NT>::local_map_type;
692  const char prefix[] = "Tpetra::Details::packCrsGraph: ";
693  constexpr bool debug = false;
694 
695  local_graph_type local_graph = sourceGraph.getLocalGraph ();
696  local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
697 
698  // Setting this to zero tells the caller to expect a possibly
699  // different ("nonconstant") number of packets per local index
700  // (i.e., a possibly different number of entries per row).
701  constant_num_packets = 0;
702 
703  const size_t num_export_lids (export_lids.extent (0));
704  TEUCHOS_TEST_FOR_EXCEPTION
705  (num_export_lids != size_t (num_packets_per_lid.extent (0)),
706  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
707  << num_export_lids << " != num_packets_per_lid.extent(0) = "
708  << num_packets_per_lid.extent (0) << ".");
709  if (num_export_lids != 0) {
710  TEUCHOS_TEST_FOR_EXCEPTION
711  (num_packets_per_lid.data () == nullptr, std::invalid_argument,
712  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
713  "num_packets_per_lid.data() = "
714  << num_packets_per_lid.data () << " == NULL.");
715  }
716 
717  if (num_export_lids == 0) {
718  exports = exports_view_type ("exports", 0);
719  return;
720  }
721 
722  // Array of offsets into the pack buffer.
723  View<size_t*, buffer_device_type> offsets ("offsets", num_export_lids + 1);
724 
725  // Compute number of packets per LID (row to send), as well as
726  // corresponding offsets (the prefix sum of the packet counts).
727  const size_t count =
728  computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
729  local_graph.row_map, export_lids, export_pids);
730 
731  // Resize the output pack buffer if needed.
732  if (count > size_t (exports.extent (0))) {
733  exports = exports_view_type ("exports", count);
734  if (debug) {
735  std::ostringstream os;
736  os << "*** exports resized to " << count << std::endl;
737  std::cerr << os.str ();
738  }
739  }
740  if (debug) {
741  std::ostringstream os;
742  os << "*** count: " << count << ", exports.extent(0): "
743  << exports.extent (0) << std::endl;
744  std::cerr << os.str ();
745  }
746 
747  // If exports has nonzero length at this point, then the graph has
748  // at least one entry to pack. Thus, if packing process ranks, we
749  // had better have at least one process rank to pack.
750  TEUCHOS_TEST_FOR_EXCEPTION
751  (pack_pids && exports.extent (0) != 0 &&
752  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
753  "pack_pids is true, and exports.extent(0) = " <<
754  exports.extent (0) << " != 0, meaning that we need to pack at least "
755  "one graph entry, but export_pids.extent(0) = 0.");
756 
757  exports.modify_device ();
758  auto exports_d = exports.view_device ();
759  do_pack<packet_type, local_graph_type, local_map_type, buffer_device_type>
760  (local_graph, local_col_map, exports_d, num_packets_per_lid,
761  export_lids, export_pids, offsets, pack_pids);
762  // If we got this far, we succeeded.
763 }
764 
765 } // namespace PackCrsGraphImpl
766 
767 template<typename LO, typename GO, typename NT>
768 void
769 packCrsGraph (const CrsGraph<LO, GO, NT>& sourceGraph,
770  Teuchos::Array<typename CrsGraph<LO,GO,NT>::packet_type>& exports,
771  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
772  const Teuchos::ArrayView<const LO>& exportLIDs,
773  size_t& constantNumPackets,
774  Distributor& distor)
775 {
776  using Kokkos::HostSpace;
777  using Kokkos::MemoryUnmanaged;
778  using Kokkos::View;
779  using crs_graph_type = CrsGraph<LO, GO, NT>;
780  using packet_type = typename crs_graph_type::packet_type;
781  using BDT = typename crs_graph_type::buffer_device_type;
782 
783  // Convert all Teuchos::Array to Kokkos::View
784 
785  // This is an output array, so we don't have to copy to device here.
786  // However, we'll have to remember to copy back to host when done.
787  BDT outputDevice;
788  View<size_t*, BDT> num_packets_per_lid_d =
790  numPacketsPerLID.getRawPtr (),
791  numPacketsPerLID.size (), false,
792  "num_packets_per_lid");
793  // This is an input array, so we have to copy to device here.
794  // However, we never need to copy it back to host.
795  View<const LO*, BDT> export_lids_d =
797  exportLIDs.getRawPtr (),
798  exportLIDs.size (), true,
799  "export_lids");
800  View<const int*, BDT> export_pids_d;
801  Kokkos::DualView<packet_type*, BDT> exports_dv;
802  constexpr bool pack_pids = false;
803 
804  static_assert
805  (std::is_same<
806  typename decltype (num_packets_per_lid_d)::non_const_value_type,
807  size_t>::value,
808  "num_packets_per_lid_d's non_const_value_type should be size_t.");
809  static_assert
810  (std::is_same<
811  typename decltype (num_packets_per_lid_d)::device_type,
812  BDT>::value,
813  "num_packets_per_lid_d's BDT should be size_t.");
814  static_assert
815  (std::is_same<
816  typename decltype (export_lids_d)::device_type,
817  BDT>::value,
818  "export_lids_d's device_type should be BDT.");
819  static_assert
820  (std::is_same<
821  typename decltype (export_pids_d)::non_const_value_type,
822  int>::value,
823  "export_pids_d's non_const_value_type should be int.");
824  static_assert
825  (std::is_same<
826  typename decltype (export_pids_d)::device_type,
827  BDT>::value,
828  "export_pids_d's device_type should be BDT.");
829 
831  (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
832  export_pids_d, constantNumPackets, pack_pids, distor);
833 
834  // The counts are an output of packCrsGraph, so we have to copy
835  // them back to host.
836  View<size_t*, HostSpace, MemoryUnmanaged>
837  num_packets_per_lid_h (numPacketsPerLID.getRawPtr (),
838  numPacketsPerLID.size ());
839  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
840 
841  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
842  // exports_dv above, then we have two host copies for exports_h.
843 
844  // The exports are an output of packCrsGraph, so we have to
845  // copy them back to host.
846  if (static_cast<size_t> (exports.size ()) !=
847  static_cast<size_t> (exports_dv.extent (0))) {
848  exports.resize (exports_dv.extent (0));
849  }
850  View<packet_type*, HostSpace, MemoryUnmanaged>
851  exports_h (exports.getRawPtr (), exports.size ());
852  Kokkos::deep_copy (exports_h, exports_dv.d_view);
853 }
854 
857 template<typename LO, typename GO, typename NT>
858 void
860  const Kokkos::DualView<
861  const LO*,
863  >& export_lids,
864  const Kokkos::DualView<
865  const int*,
867  >& export_pids,
868  Kokkos::DualView<
870  typename CrsGraph<LO,GO,NT>::buffer_device_type>& exports,
871  Kokkos::DualView<
872  size_t*,
874  > num_packets_per_lid,
875  size_t& constant_num_packets,
876  const bool pack_pids,
877  Distributor& /* dist */)
878 {
879  using Kokkos::View;
880  using crs_graph_type = CrsGraph<LO,GO,NT>;
881  using BDT = typename crs_graph_type::buffer_device_type;
882  using PT = typename crs_graph_type::packet_type;
883  using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
884  using LGT = typename crs_graph_type::local_graph_type;
885  using LMT = typename crs_graph_type::map_type::local_map_type;
886  const char prefix[] = "Tpetra::Details::packCrsGraphNew: ";
887 
888  const LGT local_graph = sourceGraph.getLocalGraph ();
889  const LMT local_col_map = sourceGraph.getColMap ()->getLocalMap ();
890 
891  // Setting this to zero tells the caller to expect a possibly
892  // different ("nonconstant") number of packets per local index
893  // (i.e., a possibly different number of entries per row).
894  constant_num_packets = 0;
895 
896  const size_t num_export_lids =
897  static_cast<size_t> (export_lids.extent (0));
898  TEUCHOS_TEST_FOR_EXCEPTION
899  (num_export_lids !=
900  static_cast<size_t> (num_packets_per_lid.extent (0)),
901  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
902  << num_export_lids << " != num_packets_per_lid.extent(0) = "
903  << num_packets_per_lid.extent (0) << ".");
904  TEUCHOS_TEST_FOR_EXCEPTION
905  (num_export_lids != 0 &&
906  num_packets_per_lid.view_device ().data () == nullptr,
907  std::invalid_argument, prefix << "num_export_lids = "<< num_export_lids
908  << " != 0, but num_packets_per_lid.view_device().data() = nullptr.");
909 
910  if (num_export_lids == 0) {
911  exports = exports_dual_view_type ();
912  return;
913  }
914 
915  // Array of offsets into the pack buffer.
916  using offsets_type = Kokkos::View<size_t*, BDT>;
917  offsets_type offsets ("offsets", num_export_lids + 1);
918 
919  // Compute number of packets per LID (row to send), as well as
920  // corresponding offsets (the prefix sum of the packet counts).
921  num_packets_per_lid.clear_sync_state ();
922  num_packets_per_lid.modify_device ();
923  using PackCrsGraphImpl::computeNumPacketsAndOffsets;
924  const size_t count =
925  computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
926  local_graph.row_map,
927  export_lids.view_device (),
928  export_pids.view_device ());
929 
930  // Resize the output pack buffer if needed.
931  if (count > static_cast<size_t> (exports.extent (0))) {
932  exports = exports_dual_view_type ("exports", count);
933  }
934 
935  // If exports has nonzero length at this point, then the graph has
936  // at least one entry to pack. Thus, if packing process ranks, we
937  // had better have at least one process rank to pack.
938  TEUCHOS_TEST_FOR_EXCEPTION
939  (pack_pids && exports.extent (0) != 0 &&
940  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
941  "pack_pids is true, and exports.extent(0) = " <<
942  exports.extent (0) << " != 0, meaning that we need to pack at least "
943  "one graph entry, but export_pids.extent(0) = 0.");
944 
945  exports.modify_device ();
946  using PackCrsGraphImpl::do_pack;
947  do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
948  exports.view_device (),
949  num_packets_per_lid.view_device (),
950  export_lids.view_device (),
951  export_pids.view_device (),
952  offsets, pack_pids);
953 }
954 
955 template<typename LO, typename GO, typename NT>
956 void
958 (const CrsGraph<LO, GO, NT>& sourceGraph,
959  Kokkos::DualView<
962  >& exports_dv,
963  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
964  const Teuchos::ArrayView<const LO>& exportLIDs,
965  const Teuchos::ArrayView<const int>& sourcePIDs,
966  size_t& constantNumPackets,
967  Distributor& distor)
968 {
969  using Kokkos::HostSpace;
970  using Kokkos::MemoryUnmanaged;
971  using Kokkos::View;
972  using crs_graph_type = CrsGraph<LO, GO, NT>;
973  using buffer_device_type = typename crs_graph_type::buffer_device_type;
974 
975  // Convert all Teuchos::Array to Kokkos::View
976 
977  // This is an output array, so we don't have to copy to device here.
978  // However, we'll have to remember to copy back to host when done.
979  View<size_t*, buffer_device_type> num_packets_per_lid_d =
980  create_mirror_view_from_raw_host_array (buffer_device_type (),
981  numPacketsPerLID.getRawPtr (),
982  numPacketsPerLID.size (), false,
983  "num_packets_per_lid");
984 
985  // This is an input array, so we have to copy to device here.
986  // However, we never need to copy it back to host.
987  View<const LO*, buffer_device_type> export_lids_d =
988  create_mirror_view_from_raw_host_array (buffer_device_type (),
989  exportLIDs.getRawPtr (),
990  exportLIDs.size (), true,
991  "export_lids");
992  // This is an input array, so we have to copy to device here.
993  // However, we never need to copy it back to host.
994  View<const int*, buffer_device_type> export_pids_d =
995  create_mirror_view_from_raw_host_array (buffer_device_type (),
996  sourcePIDs.getRawPtr (),
997  sourcePIDs.size (), true,
998  "export_pids");
999  constexpr bool pack_pids = true;
1001  (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
1002  export_pids_d, constantNumPackets, pack_pids, distor);
1003 
1004  // The counts are an output of packCrsGraph, so we
1005  // have to copy them back to host.
1006  View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h
1007  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1008  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1009 }
1010 
1011 } // namespace Details
1012 } // namespace Tpetra
1013 
1014 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
1015  template void \
1016  Details::packCrsGraph<LO, GO, NT> ( \
1017  const CrsGraph<LO, GO, NT>&, \
1018  Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
1019  const Teuchos::ArrayView<size_t>&, \
1020  const Teuchos::ArrayView<const LO>&, \
1021  size_t&, \
1022  Distributor&); \
1023  template void \
1024  Details::packCrsGraphNew<LO, GO, NT> ( \
1025  const CrsGraph<LO, GO, NT>&, \
1026  const Kokkos::DualView< \
1027  const LO*, \
1028  CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1029  const Kokkos::DualView< \
1030  const int*, \
1031  CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1032  Kokkos::DualView< \
1033  CrsGraph<LO,GO,NT>::packet_type*, \
1034  CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1035  Kokkos::DualView< \
1036  size_t*, \
1037  CrsGraph<LO,GO,NT>::buffer_device_type>, \
1038  size_t&, \
1039  const bool, \
1040  Distributor&); \
1041  template void \
1042  Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1043  const CrsGraph<LO, GO, NT>&, \
1044  Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1045  const Teuchos::ArrayView<size_t>&, \
1046  const Teuchos::ArrayView<const LO>&, \
1047  const Teuchos::ArrayView<const int>&, \
1048  size_t&, \
1049  Distributor&);
1050 
1051 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for &quot;invalid&quot; (flag) values of integer types...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of the Tpetra::CrsGraph class.
&quot;Local&quot; part of Map suitable for Kokkos kernels.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
Sets up and executes a communication plan for a Tpetra DistObject.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
local_graph_type getLocalGraph() const
Get the local graph.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for &quot;new&quot; DistObject interf...
Declaration and definition of Tpetra::Details::getEntryOnHost.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.