Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
43 #define TPETRA_DETAILS_CRSUTILS_HPP
44 
45 #include <numeric>
46 #include <type_traits>
47 
48 #include "TpetraCore_config.h"
49 #include "Kokkos_Core.hpp"
50 #include "Kokkos_UnorderedMap.hpp"
51 
57 
58 namespace Tpetra {
59 namespace Details {
60 
61 namespace impl {
62 
63 template<class ViewType>
64 ViewType
65 uninitialized_view(const std::string& name, const size_t& size)
66 {
67  return ViewType (Kokkos::view_alloc(name, Kokkos::WithoutInitializing), size);
68 }
69 
71 template<class RowPtr, class Indices, class Padding>
72 void
73 pad_crs_arrays(
74  RowPtr& row_ptr_beg,
75  RowPtr& row_ptr_end,
76  Indices& indices,
77  const Padding& padding)
78 {
79 
80  if (padding.size() == 0 || row_ptr_beg.size() == 0) {
81  // Nothing to do
82  return;
83  }
84 
85  // Determine if the indices array is large enough
86  auto num_row = row_ptr_beg.size() - 1;
87  auto policy = Kokkos::RangePolicy<typename Padding::execution_space>(0, num_row);
88  RowPtr entries_this_row("entries_this_row", num_row);
89  Kokkos::deep_copy(entries_this_row, 0);
90  size_t additional_size_needed = 0;
91  Kokkos::parallel_reduce("Determine additional size needed", policy,
92  KOKKOS_LAMBDA(const int i, size_t& ladditional_size_needed) {
93 
94  auto allocated_this_row = row_ptr_beg(i+1) - row_ptr_beg(i);
95  auto used_this_row = row_ptr_end(i) - row_ptr_beg(i);
96  auto free_this_row = allocated_this_row - used_this_row;
97  entries_this_row(i) = allocated_this_row;
98 
99  auto k = padding.find(static_cast<typename Padding::key_type>(i));
100  if (padding.valid_at(k)) {
101  // Additional padding was requested for this LID
102  auto num_ent = padding.value_at(k);
103  auto n = (num_ent > free_this_row) ? num_ent - free_this_row : 0;
104  entries_this_row(i) += n;
105  ladditional_size_needed += n;
106  }
107  }, additional_size_needed);
108 
109  if (additional_size_needed == 0) return;
110 
111  // The indices array must be resized and the row_ptr arrays shuffled
112  auto indices_new = uninitialized_view<Indices>("ind new", indices.size()+additional_size_needed);
113 
114  // mfh: Not so fussy about this not being a kernel initially,
115  // since we're adding a new feature upon which existing code does not rely,
116  // namely Export/Import to a StaticProfile CrsGraph. However, watch out
117  // for fence()ing relating to UVM.
118  auto this_row_beg = row_ptr_beg(0);
119  auto this_row_end = row_ptr_end(0);
120  using range = Kokkos::pair<typename RowPtr::value_type, typename RowPtr::value_type>;
121  for (typename RowPtr::size_type i=0; i<num_row-1; i++) {
122 
123  // First, copy over indices for this row
124  auto used_this_row = this_row_end - this_row_beg;
125  auto indices_old_subview =
126  subview(indices, range(this_row_beg, this_row_beg+used_this_row));
127  auto indices_new_subview =
128  subview(indices_new, range(row_ptr_beg(i), row_ptr_beg(i)+used_this_row));
129 
130  // just call memcpy; it works fine on device if this becomes a kernel
131  using value_type = typename Indices::non_const_value_type;
132  memcpy(indices_new_subview.data(), indices_old_subview.data(),
133  used_this_row * sizeof(value_type));
134 
135  // TODO could this actually have extra entries at the end of each row?
136  // If yes, then fill those entries with an "invalid" value.
137 
138  // Before modifying the row_ptr arrays, save current beg, end for next iteration
139  this_row_beg = row_ptr_beg(i+1);
140  this_row_end = row_ptr_end(i+1);
141 
142  auto used_next_row = row_ptr_end(i+1) - row_ptr_beg(i+1);
143  row_ptr_beg(i+1) = row_ptr_beg(i) + entries_this_row(i);
144  row_ptr_end(i+1) = row_ptr_beg(i+1) + used_next_row;
145  }
146  {
147  row_ptr_beg(num_row) = indices_new.size();
148  // Copy indices for last row
149  auto n = num_row - 1;
150  auto used_this_row = row_ptr_end(n) - row_ptr_beg(n);
151  auto indices_old_subview =
152  subview(indices, range(this_row_beg, this_row_beg+used_this_row));
153  auto indices_new_subview =
154  subview(indices_new, range(row_ptr_beg(n), row_ptr_beg(n)+used_this_row));
155  using value_type = typename Indices::non_const_value_type;
156  memcpy(indices_new_subview.data(), indices_old_subview.data(),
157  used_this_row * sizeof(value_type));
158  }
159  indices = indices_new;
160 }
161 
163 template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
164 size_t
165 insert_crs_indices(
166  typename Pointers::value_type const row,
167  Pointers const& row_ptrs,
168  InOutIndices& cur_indices,
169  size_t& num_assigned,
170  InIndices const& new_indices,
171  IndexMap&& map,
172  std::function<void(size_t const, size_t const, size_t const)> cb)
173 {
174  using offset_type = typename std::decay<decltype (row_ptrs[0])>::type;
175  using ordinal_type = typename std::decay<decltype (cur_indices[0])>::type;
176 
177  if (new_indices.size() == 0) {
178  return 0;
179  }
180 
181  const offset_type start = row_ptrs[row];
182  offset_type end = start + static_cast<offset_type> (num_assigned);
183  const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t (0) :
184  row_ptrs[row + 1] - end;
185  const size_t num_new_indices = static_cast<size_t> (new_indices.size ());
186  size_t num_inserted = 0;
187 
188  for (size_t k = 0; k < num_new_indices; ++k) {
189  const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
190  offset_type row_offset = start;
191  for (; row_offset < end; ++row_offset) {
192  if (idx == cur_indices[row_offset]) {
193  break;
194  }
195  }
196 
197  if (row_offset == end) {
198  if (num_inserted >= num_avail) { // not enough room
199  return Teuchos::OrdinalTraits<size_t>::invalid();
200  }
201  // This index is not yet in indices
202  cur_indices[end++] = idx;
203  num_inserted++;
204  }
205  if (cb) {
206  cb(k, start, row_offset - start);
207  }
208  }
209  num_assigned += num_inserted;
210  return num_inserted;
211 }
212 
214 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
215 size_t
216 find_crs_indices(
217  typename Pointers::value_type const row,
218  Pointers const& row_ptrs,
219  Indices1 const& cur_indices,
220  Indices2 const& new_indices,
221  IndexMap&& map,
222  Callback&& cb)
223 {
224  if (new_indices.size() == 0)
225  return 0;
226 
227  using ordinal = typename Indices1::value_type;
228  auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
229 
230  auto const start = row_ptrs[row];
231  auto const end = row_ptrs[row + 1];
232  size_t num_found = 0;
233  for (size_t k = 0; k < new_indices.size(); k++)
234  {
235  auto row_offset = start;
236  auto idx = std::forward<IndexMap>(map)(new_indices[k]);
237  if (idx == invalid_ordinal)
238  continue;
239  for (; row_offset < end; row_offset++)
240  {
241  if (idx == cur_indices[row_offset])
242  {
243  std::forward<Callback>(cb)(k, start, row_offset - start);
244  num_found++;
245  }
246  }
247  }
248  return num_found;
249 }
250 
251 } // namespace impl
252 
253 
275 template<class RowPtr, class Indices, class Padding>
276 void
278  RowPtr& rowPtrBeg,
279  RowPtr& rowPtrEnd,
280  Indices& indices,
281  const Padding& padding)
282 {
283  using impl::pad_crs_arrays;
284  pad_crs_arrays<RowPtr, Indices, Padding>(rowPtrBeg, rowPtrEnd, indices, padding);
285 }
286 
332 template <class Pointers, class InOutIndices, class InIndices>
333 size_t
335  typename Pointers::value_type const row,
336  Pointers const& rowPtrs,
337  InOutIndices& curIndices,
338  size_t& numAssigned,
339  InIndices const& newIndices,
340  std::function<void(const size_t, const size_t, const size_t)> cb =
341  std::function<void(const size_t, const size_t, const size_t)>())
342 {
343  static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
344  typename std::remove_const<typename InIndices::value_type>::type>::value,
345  "Expected views to have same value type");
346 
347  // Provide a unit map for the more general insert_indices
348  using ordinal = typename InOutIndices::value_type;
349  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
350  numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
351  return numInserted;
352 }
353 
354 template <class Pointers, class InOutIndices, class InIndices>
355 size_t
357  typename Pointers::value_type const row,
358  Pointers const& rowPtrs,
359  InOutIndices& curIndices,
360  size_t& numAssigned,
361  InIndices const& newIndices,
362  std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
363  std::function<void(const size_t, const size_t, const size_t)> cb =
364  std::function<void(const size_t, const size_t, const size_t)>())
365 {
366  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
367  numAssigned, newIndices, map, cb);
368  return numInserted;
369 }
370 
371 
401 template <class Pointers, class Indices1, class Indices2, class Callback>
402 size_t
404  typename Pointers::value_type const row,
405  Pointers const& rowPtrs,
406  Indices1 const& curIndices,
407  Indices2 const& newIndices,
408  Callback&& cb)
409 {
410  static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
411  typename std::remove_const<typename Indices2::value_type>::type>::value,
412  "Expected views to have same value type");
413  // Provide a unit map for the more general find_crs_indices
414  using ordinal = typename Indices2::value_type;
415  auto numFound = impl::find_crs_indices(row, rowPtrs, curIndices, newIndices,
416  [=](ordinal ind){ return ind; }, cb);
417  return numFound;
418 }
419 
420 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
421 size_t
423  typename Pointers::value_type const row,
424  Pointers const& rowPtrs,
425  Indices1 const& curIndices,
426  Indices2 const& newIndices,
427  IndexMap&& map,
428  Callback&& cb)
429 {
430  return impl::find_crs_indices(row, rowPtrs, curIndices, newIndices, map, cb);
431 }
432 
433 } // namespace Details
434 } // namespace Tpetra
435 
436 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void padCrsArrays(RowPtr &rowPtrBeg, RowPtr &rowPtrEnd, Indices &indices, const Padding &padding)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.