Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
43 #define TPETRA_DETAILS_CRSUTILS_HPP
44 
45 #include <numeric>
46 #include <type_traits>
47 
48 #include "TpetraCore_config.h"
49 #include "Kokkos_Core.hpp"
50 #include "Kokkos_UnorderedMap.hpp"
51 
57 
58 namespace Tpetra {
59 namespace Details {
60 
61 namespace impl {
62 
63 template<class ViewType>
64 ViewType
65 uninitialized_view(const std::string& name, const size_t& size)
66 {
67  return ViewType (Kokkos::view_alloc(name, Kokkos::WithoutInitializing), size);
68 }
69 
71 template<class RowPtr, class Indices, class Values, class Padding>
72 void
73 pad_crs_arrays(
74  const RowPtr& row_ptr_beg,
75  const RowPtr& row_ptr_end,
76  Indices& indices,
77  Values& values,
78  const Padding& padding)
79 {
80 
81  if (padding.size() == 0 || row_ptr_beg.size() == 0) {
82  // Nothing to do
83  return;
84  }
85 
86  auto pad_values = values.extent(0) == indices.extent(0);
87 
88  // Determine if the indices array is large enough
89  auto num_row = row_ptr_beg.size() - 1;
90  auto policy = Kokkos::RangePolicy<typename Padding::execution_space>(0, num_row);
91  RowPtr entries_this_row("entries_this_row", num_row);
92  Kokkos::deep_copy(entries_this_row, 0);
93  size_t additional_size_needed = 0;
94  Kokkos::parallel_reduce("Determine additional size needed", policy,
95  KOKKOS_LAMBDA(const int i, size_t& ladditional_size_needed) {
96 
97  auto allocated_this_row = row_ptr_beg(i+1) - row_ptr_beg(i);
98  auto used_this_row = row_ptr_end(i) - row_ptr_beg(i);
99  auto free_this_row = allocated_this_row - used_this_row;
100  entries_this_row(i) = allocated_this_row;
101 
102  auto k = padding.find(static_cast<typename Padding::key_type>(i));
103  if (padding.valid_at(k)) {
104  // Additional padding was requested for this LID
105  auto num_ent = padding.value_at(k);
106  auto n = (num_ent > free_this_row) ? num_ent - free_this_row : 0;
107  entries_this_row(i) += n;
108  ladditional_size_needed += n;
109  }
110  }, additional_size_needed);
111 
112  if (additional_size_needed == 0)
113  return;
114 
115  using ptrs_value_type = typename RowPtr::non_const_value_type;
116  using inds_value_type = typename Indices::non_const_value_type;
117  using vals_value_type = typename Values::non_const_value_type;
118 
119  // The indices array must be resized and the row_ptr arrays shuffled
120  auto indices_new = uninitialized_view<Indices>("ind new", indices.size()+additional_size_needed);
121  auto values_new = uninitialized_view<Values>("val new", pad_values ? values.size()+additional_size_needed : 0);
122  Kokkos::deep_copy(values_new, vals_value_type(0.0));
123 
124  // mfh: Not so fussy about this not being a kernel initially,
125  // since we're adding a new feature upon which existing code does not rely,
126  // namely Export/Import to a StaticProfile CrsGraph. However, watch out
127  // for fence()ing relating to UVM.
128  auto this_row_beg = row_ptr_beg(0);
129  auto this_row_end = row_ptr_end(0);
130  using range = Kokkos::pair<ptrs_value_type, ptrs_value_type>;
131  for (typename RowPtr::size_type i=0; i<num_row-1; i++) {
132 
133  auto used_this_row = this_row_end - this_row_beg;
134 
135  // Copy over indices
136  {
137  auto indices_old_subview = subview(indices, range(this_row_beg, this_row_beg+used_this_row));
138  auto indices_new_subview = subview(indices_new, range(row_ptr_beg(i), row_ptr_beg(i)+used_this_row));
139  // just call memcpy; it works fine on device if this becomes a kernel
140  memcpy(indices_new_subview.data(), indices_old_subview.data(), used_this_row * sizeof(inds_value_type));
141  }
142 
143  // And then the values
144  if (pad_values) {
145  auto values_old_subview = subview(values, range(this_row_beg, this_row_beg+used_this_row));
146  auto values_new_subview = subview(values_new, range(row_ptr_beg(i), row_ptr_beg(i)+used_this_row));
147  memcpy(values_new_subview.data(), values_old_subview.data(), used_this_row * sizeof(vals_value_type));
148  }
149 
150  // Before modifying the row_ptr arrays, save current beg, end for next iteration
151  this_row_beg = row_ptr_beg(i+1);
152  this_row_end = row_ptr_end(i+1);
153 
154  auto used_next_row = row_ptr_end(i+1) - row_ptr_beg(i+1);
155  row_ptr_beg(i+1) = row_ptr_beg(i) + entries_this_row(i);
156  row_ptr_end(i+1) = row_ptr_beg(i+1) + used_next_row;
157  }
158 
159  {
160  // Copy indices/values for last row
161  row_ptr_beg(num_row) = indices_new.size();
162  auto n = num_row - 1;
163  auto used_this_row = row_ptr_end(n) - row_ptr_beg(n);
164 
165  {
166  auto indices_old_subview = subview(indices, range(this_row_beg, this_row_beg+used_this_row));
167  auto indices_new_subview = subview(indices_new, range(row_ptr_beg(n), row_ptr_beg(n)+used_this_row));
168  memcpy(indices_new_subview.data(), indices_old_subview.data(), used_this_row * sizeof(inds_value_type));
169  }
170 
171  if (pad_values) {
172  auto values_old_subview = subview(values, range(this_row_beg, this_row_beg+used_this_row));
173  auto values_new_subview = subview(values_new, range(row_ptr_beg(n), row_ptr_beg(n)+used_this_row));
174  memcpy(values_new_subview.data(), values_old_subview.data(), used_this_row * sizeof(vals_value_type));
175  }
176  }
177 
178  indices = indices_new;
179  values = values_new;
180 }
181 
183 template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
184 size_t
185 insert_crs_indices(
186  typename Pointers::value_type const row,
187  Pointers const& row_ptrs,
188  InOutIndices& cur_indices,
189  size_t& num_assigned,
190  InIndices const& new_indices,
191  IndexMap&& map,
192  std::function<void(size_t const, size_t const, size_t const)> cb)
193 {
194  if (new_indices.size() == 0) {
195  return 0;
196  }
197 
198  if (cur_indices.size() == 0) {
199  // No room to insert new indices
200  return Teuchos::OrdinalTraits<size_t>::invalid();
201  }
202 
203  using offset_type = typename std::decay<decltype (row_ptrs[0])>::type;
204  using ordinal_type = typename std::decay<decltype (cur_indices[0])>::type;
205 
206  const offset_type start = row_ptrs[row];
207  offset_type end = start + static_cast<offset_type> (num_assigned);
208  const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t (0) :
209  row_ptrs[row + 1] - end;
210  const size_t num_new_indices = static_cast<size_t> (new_indices.size ());
211  size_t num_inserted = 0;
212 
213  for (size_t k = 0; k < num_new_indices; ++k) {
214  const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
215  offset_type row_offset = start;
216  for (; row_offset < end; ++row_offset) {
217  if (idx == cur_indices[row_offset]) {
218  break;
219  }
220  }
221 
222  if (row_offset == end) {
223  if (num_inserted >= num_avail) { // not enough room
224  return Teuchos::OrdinalTraits<size_t>::invalid();
225  }
226  // This index is not yet in indices
227  cur_indices[end++] = idx;
228  num_inserted++;
229  }
230  if (cb) {
231  cb(k, start, row_offset - start);
232  }
233  }
234  num_assigned += num_inserted;
235  return num_inserted;
236 }
237 
239 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
240 size_t
241 find_crs_indices(
242  typename Pointers::value_type const row,
243  Pointers const& row_ptrs,
244  const size_t curNumEntries,
245  Indices1 const& cur_indices,
246  Indices2 const& new_indices,
247  IndexMap&& map,
248  Callback&& cb)
249 {
250  if (new_indices.size() == 0)
251  return 0;
252 
253  using ordinal = typename Indices1::value_type;
254  auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
255 
256  const size_t start = static_cast<size_t> (row_ptrs[row]);
257  const size_t end = start + curNumEntries;
258  size_t num_found = 0;
259  for (size_t k = 0; k < new_indices.size(); k++)
260  {
261  auto row_offset = start;
262  auto idx = std::forward<IndexMap>(map)(new_indices[k]);
263  if (idx == invalid_ordinal)
264  continue;
265  for (; row_offset < end; row_offset++)
266  {
267  if (idx == cur_indices[row_offset])
268  {
269  std::forward<Callback>(cb)(k, start, row_offset - start);
270  num_found++;
271  }
272  }
273  }
274  return num_found;
275 }
276 
277 } // namespace impl
278 
279 
301 template<class RowPtr, class Indices, class Padding>
302 void
304  const RowPtr& rowPtrBeg,
305  const RowPtr& rowPtrEnd,
306  Indices& indices,
307  const Padding& padding)
308 {
309  using impl::pad_crs_arrays;
310  // send empty values array
311  Indices values;
312  pad_crs_arrays<RowPtr, Indices, Indices, Padding>(rowPtrBeg, rowPtrEnd, indices, values, padding);
313 }
314 
315 template<class RowPtr, class Indices, class Values, class Padding>
316 void
318  const RowPtr& rowPtrBeg,
319  const RowPtr& rowPtrEnd,
320  Indices& indices,
321  Values& values,
322  const Padding& padding)
323 {
324  using impl::pad_crs_arrays;
325  pad_crs_arrays<RowPtr, Indices, Values, Padding>(rowPtrBeg, rowPtrEnd, indices, values, padding);
326 }
327 
373 template <class Pointers, class InOutIndices, class InIndices>
374 size_t
376  typename Pointers::value_type const row,
377  Pointers const& rowPtrs,
378  InOutIndices& curIndices,
379  size_t& numAssigned,
380  InIndices const& newIndices,
381  std::function<void(const size_t, const size_t, const size_t)> cb =
382  std::function<void(const size_t, const size_t, const size_t)>())
383 {
384  static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
385  typename std::remove_const<typename InIndices::value_type>::type>::value,
386  "Expected views to have same value type");
387 
388  // Provide a unit map for the more general insert_indices
389  using ordinal = typename InOutIndices::value_type;
390  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
391  numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
392  return numInserted;
393 }
394 
395 template <class Pointers, class InOutIndices, class InIndices>
396 size_t
398  typename Pointers::value_type const row,
399  Pointers const& rowPtrs,
400  InOutIndices& curIndices,
401  size_t& numAssigned,
402  InIndices const& newIndices,
403  std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
404  std::function<void(const size_t, const size_t, const size_t)> cb =
405  std::function<void(const size_t, const size_t, const size_t)>())
406 {
407  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
408  numAssigned, newIndices, map, cb);
409  return numInserted;
410 }
411 
412 
442 template <class Pointers, class Indices1, class Indices2, class Callback>
443 size_t
445  typename Pointers::value_type const row,
446  Pointers const& rowPtrs,
447  const size_t curNumEntries,
448  Indices1 const& curIndices,
449  Indices2 const& newIndices,
450  Callback&& cb)
451 {
452  static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
453  typename std::remove_const<typename Indices2::value_type>::type>::value,
454  "Expected views to have same value type");
455  // Provide a unit map for the more general find_crs_indices
456  using ordinal = typename Indices2::value_type;
457  auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
458  [=](ordinal ind){ return ind; }, cb);
459  return numFound;
460 }
461 
462 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
463 size_t
465  typename Pointers::value_type const row,
466  Pointers const& rowPtrs,
467  const size_t curNumEntries,
468  Indices1 const& curIndices,
469  Indices2 const& newIndices,
470  IndexMap&& map,
471  Callback&& cb)
472 {
473  return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
474 }
475 
476 } // namespace Details
477 } // namespace Tpetra
478 
479 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...