Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
41 #define TPETRA_DETAILS_CRSUTILS_HPP
42 #include <numeric>
43 #include <type_traits>
44 
45 #include "TpetraCore_config.h"
46 #include "Kokkos_Core.hpp"
48 #include "Tpetra_Details_CrsPadding.hpp"
49 #include <iostream>
50 #include <memory>
51 
57 
58 namespace Tpetra {
59 namespace Details {
60 
61 namespace impl {
62 
63 template<class ViewType>
64 ViewType
65 make_uninitialized_view(
66  const std::string& name,
67  const size_t size,
68  const bool verbose,
69  const std::string* const prefix)
70 {
71  if (verbose) {
72  std::ostringstream os;
73  os << *prefix << "Allocate Kokkos::View " << name
74  << ": " << size << std::endl;
75  std::cerr << os.str();
76  }
77  using Kokkos::view_alloc;
78  using Kokkos::WithoutInitializing;
79  return ViewType(view_alloc(name, WithoutInitializing), size);
80 }
81 
82 template<class ViewType>
83 ViewType
84 make_initialized_view(
85  const std::string& name,
86  const size_t size,
87  const bool verbose,
88  const std::string* const prefix)
89 {
90  if (verbose) {
91  std::ostringstream os;
92  os << *prefix << "Allocate & initialize Kokkos::View "
93  << name << ": " << size << std::endl;
94  std::cerr << os.str();
95  }
96  return ViewType(name, size);
97 }
98 
99 template<class OutViewType, class InViewType>
100 void
101 assign_to_view(OutViewType& out,
102  const InViewType& in,
103  const char viewName[],
104  const bool verbose,
105  const std::string* const prefix)
106 {
107  if (verbose) {
108  std::ostringstream os;
109  os << *prefix << "Assign to Kokkos::View " << viewName
110  << ": Old size: " << out.extent(0)
111  << ", New size: " << in.extent(0) << std::endl;
112  std::cerr << os.str();
113  }
114  out = in;
115 }
116 
117 template<class MemorySpace, class ViewType>
118 auto create_mirror_view(
119  const MemorySpace& memSpace,
120  const ViewType& view,
121  const bool verbose,
122  const std::string* const prefix) ->
123  decltype(Kokkos::create_mirror_view(memSpace, view))
124 {
125  if (verbose) {
126  std::ostringstream os;
127  os << *prefix << "Create mirror view: "
128  << "view.extent(0): " << view.extent(0) << std::endl;
129  std::cerr << os.str();
130  }
131  return Kokkos::create_mirror_view(memSpace, view);
132 }
133 
134 enum class PadCrsAction {
135  INDICES_ONLY,
136  INDICES_AND_VALUES
137 };
138 
147 template<class RowPtr, class Indices, class Values, class Padding>
148 void
149 pad_crs_arrays(
150  const PadCrsAction action,
151  const RowPtr& row_ptr_beg,
152  const RowPtr& row_ptr_end,
153  Indices& indices,
154  Values& values,
155  const Padding& padding,
156  const int my_rank,
157  const bool verbose)
158 {
159  using Kokkos::view_alloc;
160  using Kokkos::WithoutInitializing;
161  using std::endl;
162  std::unique_ptr<std::string> prefix;
163 
164  const size_t maxNumToPrint = verbose ?
166  if (verbose) {
167  std::ostringstream os;
168  os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: ";
169  prefix = std::unique_ptr<std::string>(new std::string(os.str()));
170  os << "Start" << endl;
171  std::cerr << os.str();
172  }
173  Kokkos::HostSpace hostSpace;
174 
175  if (verbose) {
176  std::ostringstream os;
177  os << *prefix << "On input: ";
178  auto row_ptr_beg_h =
179  Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
180  Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
181  verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan",
182  maxNumToPrint);
183  os << ", ";
184  auto row_ptr_end_h =
185  Kokkos::create_mirror_view(hostSpace, row_ptr_end);
186  Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
187  verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan",
188  maxNumToPrint);
189  os << ", indices.extent(0): " << indices.extent(0)
190  << ", values.extent(0): " << values.extent(0)
191  << ", padding: ";
192  padding.print(os);
193  os << endl;
194  std::cerr << os.str();
195  }
196 
197  if (row_ptr_beg.size() == 0) {
198  if (verbose) {
199  std::ostringstream os;
200  os << *prefix << "Done; local matrix has no rows" << endl;
201  std::cerr << os.str();
202  }
203  return; // nothing to do
204  }
205 
206  const size_t lclNumRows(row_ptr_beg.size() - 1);
207  RowPtr newAllocPerRow =
208  make_uninitialized_view<RowPtr>("newAllocPerRow", lclNumRows,
209  verbose, prefix.get());
210  if (verbose) {
211  std::ostringstream os;
212  os << *prefix << "Fill newAllocPerRow & compute increase" << endl;
213  std::cerr << os.str();
214  }
215  size_t increase = 0;
216  {
217  auto row_ptr_end_h = create_mirror_view(
218  hostSpace, row_ptr_end, verbose, prefix.get());
219  Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
220  auto row_ptr_beg_h = create_mirror_view(
221  hostSpace, row_ptr_beg, verbose, prefix.get());
222  Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
223 
224  auto newAllocPerRow_h = create_mirror_view(
225  hostSpace, newAllocPerRow, verbose, prefix.get());
226  using host_range_type = Kokkos::RangePolicy<
227  Kokkos::DefaultHostExecutionSpace, size_t>;
228  Kokkos::parallel_reduce
229  ("Tpetra::CrsGraph: Compute new allocation size per row",
230  host_range_type(0, lclNumRows),
231  [&] (const size_t lclRowInd, size_t& lclIncrease) {
232  const size_t start = row_ptr_beg_h[lclRowInd];
233  const size_t end = row_ptr_beg_h[lclRowInd+1];
234  TEUCHOS_ASSERT( end >= start );
235  const size_t oldAllocSize = end - start;
236  const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
237  TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
238 
239  // This is not a pack routine. Do not shrink! Shrinking now
240  // to fit the number of entries would ignore users' hint for
241  // the max number of entries in each row. Also, CrsPadding
242  // only counts entries and ignores any available free space.
243 
244  auto result = padding.get_result(lclRowInd);
245  const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
246  if (newNumEnt > oldAllocSize) {
247  lclIncrease += (newNumEnt - oldAllocSize);
248  newAllocPerRow_h[lclRowInd] = newNumEnt;
249  }
250  else {
251  newAllocPerRow_h[lclRowInd] = oldAllocSize;
252  }
253  }, increase);
254 
255  if (verbose) {
256  std::ostringstream os;
257  os << *prefix << "increase: " << increase << ", ";
258  verbosePrintArray(os, newAllocPerRow_h, "newAllocPerRow",
259  maxNumToPrint);
260  os << endl;
261  std::cerr << os.str();
262  }
263 
264  if (increase == 0) {
265  return;
266  }
267  Kokkos::deep_copy(newAllocPerRow, newAllocPerRow_h);
268  }
269 
270  using inds_value_type = typename Indices::non_const_value_type;
271  using vals_value_type = typename Values::non_const_value_type;
272 
273  const size_t newIndsSize = size_t(indices.size()) + increase;
274  auto indices_new = make_uninitialized_view<Indices>(
275  "Tpetra::CrsGraph column indices", newIndsSize, verbose,
276  prefix.get());
277 
278  Values values_new;
279  if (action == PadCrsAction::INDICES_AND_VALUES) {
280  const size_t newValsSize = newIndsSize;
281  // NOTE (mfh 10 Feb 2020) If we don't initialize values_new here,
282  // then the CrsMatrix tests fail.
283  values_new = make_initialized_view<Values>(
284  "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
285  }
286 
287  if (verbose) {
288  std::ostringstream os;
289  os << *prefix << "Repack" << endl;
290  std::cerr << os.str();
291  }
292  using execution_space = typename Indices::execution_space;
293  using range_type = Kokkos::RangePolicy<execution_space, size_t>;
294  Kokkos::parallel_scan(
295  "Tpetra::CrsGraph or CrsMatrix repack",
296  range_type(0, lclNumRows+1),
297  KOKKOS_LAMBDA (const size_t lclRow, size_t& newRowBeg,
298  const bool finalPass)
299  {
300  // row_ptr_beg has lclNumRows + 1 entries.
301  // row_ptr_end has lclNumRows entries.
302  // newAllocPerRow has lclNumRows entries.
303  const size_t row_beg = row_ptr_beg[lclRow];
304  const size_t row_end =
305  lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
306  const size_t numEnt = row_end - row_beg;
307  const size_t newRowAllocSize =
308  lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
309  if (finalPass) {
310  if (lclRow < lclNumRows) {
311  const Kokkos::pair<size_t, size_t> oldRange(
312  row_beg, row_beg + numEnt);
313  const Kokkos::pair<size_t, size_t> newRange(
314  newRowBeg, newRowBeg + numEnt);
315  auto oldColInds = subview(indices, oldRange);
316  auto newColInds = subview(indices_new, newRange);
317  // memcpy works fine on device; the next step is to
318  // introduce two-level parallelism and use team copy.
319  memcpy(newColInds.data(), oldColInds.data(),
320  numEnt * sizeof(inds_value_type));
321  if (action == PadCrsAction::INDICES_AND_VALUES) {
322  auto oldVals = subview(values, oldRange);
323  auto newVals = subview(values_new, newRange);
324  memcpy(newVals.data(), oldVals.data(),
325  numEnt * sizeof(vals_value_type));
326  }
327  }
328  // It's the final pass, so we can modify these arrays.
329  row_ptr_beg[lclRow] = newRowBeg;
330  if (lclRow < lclNumRows) {
331  row_ptr_end[lclRow] = newRowBeg + numEnt;
332  }
333  }
334  newRowBeg += newRowAllocSize;
335  });
336 
337  if (verbose) {
338  std::ostringstream os;
339 
340  os << *prefix;
341  auto row_ptr_beg_h =
342  Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
343  Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
344  verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan",
345  maxNumToPrint);
346  os << endl;
347 
348  os << *prefix;
349  auto row_ptr_end_h =
350  Kokkos::create_mirror_view(hostSpace, row_ptr_end);
351  Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
352  verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan",
353  maxNumToPrint);
354  os << endl;
355 
356  std::cerr << os.str();
357  }
358 
359  assign_to_view(indices, indices_new,
360  "Tpetra::CrsGraph column indices",
361  verbose, prefix.get());
362  assign_to_view(values, values_new,
363  "Tpetra::CrsMatrix values",
364  verbose, prefix.get());
365 
366  if (verbose) {
367  auto indices_h = Kokkos::create_mirror_view(hostSpace, indices);
368  Kokkos::deep_copy(indices_h, indices);
369  auto values_h = Kokkos::create_mirror_view(hostSpace, values);
370  Kokkos::deep_copy(values_h, values);
371  std::ostringstream os;
372  os << "On output: ";
373  verbosePrintArray(os, indices_h, "indices", maxNumToPrint);
374  os << ", ";
375  verbosePrintArray(os, values_h, "values", maxNumToPrint);
376  os << ", padding: ";
377  padding.print(os);
378  os << endl;
379  }
380 
381  if (verbose) {
382  std::ostringstream os;
383  os << *prefix << "Done" << endl;
384  std::cerr << os.str();
385  }
386 }
387 
389 template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
390 size_t
391 insert_crs_indices(
392  typename Pointers::value_type const row,
393  Pointers const& row_ptrs,
394  InOutIndices& cur_indices,
395  size_t& num_assigned,
396  InIndices const& new_indices,
397  IndexMap&& map,
398  std::function<void(size_t const, size_t const, size_t const)> cb)
399 {
400  if (new_indices.size() == 0) {
401  return 0;
402  }
403 
404  if (cur_indices.size() == 0) {
405  // No room to insert new indices
406  return Teuchos::OrdinalTraits<size_t>::invalid();
407  }
408 
409  using offset_type = typename std::decay<decltype (row_ptrs[0])>::type;
410  using ordinal_type = typename std::decay<decltype (cur_indices[0])>::type;
411 
412  const offset_type start = row_ptrs[row];
413  offset_type end = start + static_cast<offset_type> (num_assigned);
414  const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t (0) :
415  row_ptrs[row + 1] - end;
416  const size_t num_new_indices = static_cast<size_t> (new_indices.size ());
417  size_t num_inserted = 0;
418 
419  for (size_t k = 0; k < num_new_indices; ++k) {
420  const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
421  offset_type row_offset = start;
422  for (; row_offset < end; ++row_offset) {
423  if (idx == cur_indices[row_offset]) {
424  break;
425  }
426  }
427 
428  if (row_offset == end) {
429  if (num_inserted >= num_avail) { // not enough room
430  return Teuchos::OrdinalTraits<size_t>::invalid();
431  }
432  // This index is not yet in indices
433  cur_indices[end++] = idx;
434  num_inserted++;
435  }
436  if (cb) {
437  cb(k, start, row_offset - start);
438  }
439  }
440  num_assigned += num_inserted;
441  return num_inserted;
442 }
443 
445 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
446 size_t
447 find_crs_indices(
448  typename Pointers::value_type const row,
449  Pointers const& row_ptrs,
450  const size_t curNumEntries,
451  Indices1 const& cur_indices,
452  Indices2 const& new_indices,
453  IndexMap&& map,
454  Callback&& cb)
455 {
456  if (new_indices.size() == 0)
457  return 0;
458 
459  using ordinal = typename Indices1::value_type;
460  auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
461 
462  const size_t start = static_cast<size_t> (row_ptrs[row]);
463  const size_t end = start + curNumEntries;
464  size_t num_found = 0;
465  for (size_t k = 0; k < new_indices.size(); k++)
466  {
467  auto row_offset = start;
468  auto idx = std::forward<IndexMap>(map)(new_indices[k]);
469  if (idx == invalid_ordinal)
470  continue;
471  for (; row_offset < end; row_offset++)
472  {
473  if (idx == cur_indices[row_offset])
474  {
475  std::forward<Callback>(cb)(k, start, row_offset - start);
476  num_found++;
477  }
478  }
479  }
480  return num_found;
481 }
482 
483 } // namespace impl
484 
485 
503 template<class RowPtr, class Indices, class Padding>
504 void
506  const RowPtr& rowPtrBeg,
507  const RowPtr& rowPtrEnd,
508  Indices& indices,
509  const Padding& padding,
510  const int my_rank,
511  const bool verbose)
512 {
513  using impl::pad_crs_arrays;
514  // send empty values array
515  Indices values;
516  pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
517  impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
518  indices, values, padding, my_rank, verbose);
519 }
520 
521 template<class RowPtr, class Indices, class Values, class Padding>
522 void
524  const RowPtr& rowPtrBeg,
525  const RowPtr& rowPtrEnd,
526  Indices& indices,
527  Values& values,
528  const Padding& padding,
529  const int my_rank,
530  const bool verbose)
531 {
532  using impl::pad_crs_arrays;
533  pad_crs_arrays<RowPtr, Indices, Values, Padding>(
534  impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
535  indices, values, padding, my_rank, verbose);
536 }
537 
583 template <class Pointers, class InOutIndices, class InIndices>
584 size_t
586  typename Pointers::value_type const row,
587  Pointers const& rowPtrs,
588  InOutIndices& curIndices,
589  size_t& numAssigned,
590  InIndices const& newIndices,
591  std::function<void(const size_t, const size_t, const size_t)> cb =
592  std::function<void(const size_t, const size_t, const size_t)>())
593 {
594  static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
595  typename std::remove_const<typename InIndices::value_type>::type>::value,
596  "Expected views to have same value type");
597 
598  // Provide a unit map for the more general insert_indices
599  using ordinal = typename InOutIndices::value_type;
600  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
601  numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
602  return numInserted;
603 }
604 
605 template <class Pointers, class InOutIndices, class InIndices>
606 size_t
608  typename Pointers::value_type const row,
609  Pointers const& rowPtrs,
610  InOutIndices& curIndices,
611  size_t& numAssigned,
612  InIndices const& newIndices,
613  std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
614  std::function<void(const size_t, const size_t, const size_t)> cb =
615  std::function<void(const size_t, const size_t, const size_t)>())
616 {
617  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
618  numAssigned, newIndices, map, cb);
619  return numInserted;
620 }
621 
622 
652 template <class Pointers, class Indices1, class Indices2, class Callback>
653 size_t
655  typename Pointers::value_type const row,
656  Pointers const& rowPtrs,
657  const size_t curNumEntries,
658  Indices1 const& curIndices,
659  Indices2 const& newIndices,
660  Callback&& cb)
661 {
662  static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
663  typename std::remove_const<typename Indices2::value_type>::type>::value,
664  "Expected views to have same value type");
665  // Provide a unit map for the more general find_crs_indices
666  using ordinal = typename Indices2::value_type;
667  auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
668  [=](ordinal ind){ return ind; }, cb);
669  return numFound;
670 }
671 
672 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
673 size_t
675  typename Pointers::value_type const row,
676  Pointers const& rowPtrs,
677  const size_t curNumEntries,
678  Indices1 const& curIndices,
679  Indices2 const& newIndices,
680  IndexMap&& map,
681  Callback&& cb)
682 {
683  return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
684 }
685 
686 } // namespace Details
687 } // namespace Tpetra
688 
689 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.