10 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
11 #define TPETRA_DETAILS_CRSUTILS_HPP
13 #include <type_traits>
15 #include "TpetraCore_config.h"
16 #include "Kokkos_Core.hpp"
18 #include "Tpetra_Details_CrsPadding.hpp"
19 #include "Tpetra_Details_WrappedDualView.hpp"
22 #include <unordered_map>
35 template<
class ViewType>
37 make_uninitialized_view(
38 const std::string& name,
41 const std::string*
const prefix)
44 std::ostringstream os;
45 os << *prefix <<
"Allocate Kokkos::View " << name
46 <<
": " << size << std::endl;
47 std::cerr << os.str();
49 using Kokkos::view_alloc;
50 using Kokkos::WithoutInitializing;
51 return ViewType(view_alloc(name, WithoutInitializing), size);
54 template<
class ViewType>
56 make_initialized_view(
57 const std::string& name,
60 const std::string*
const prefix)
63 std::ostringstream os;
64 os << *prefix <<
"Allocate & initialize Kokkos::View "
65 << name <<
": " << size << std::endl;
66 std::cerr << os.str();
68 return ViewType(name, size);
71 template<
class OutViewType,
class InViewType>
73 assign_to_view(OutViewType& out,
75 const char viewName[],
77 const std::string*
const prefix)
80 std::ostringstream os;
81 os << *prefix <<
"Assign to Kokkos::View " << viewName
82 <<
": Old size: " << out.extent(0)
83 <<
", New size: " << in.extent(0) << std::endl;
84 std::cerr << os.str();
89 template<
class MemorySpace,
class ViewType>
90 auto create_mirror_view(
91 const MemorySpace& memSpace,
94 const std::string*
const prefix) ->
95 decltype(Kokkos::create_mirror_view(memSpace, view))
98 std::ostringstream os;
99 os << *prefix <<
"Create mirror view: "
100 <<
"view.extent(0): " << view.extent(0) << std::endl;
101 std::cerr << os.str();
103 return Kokkos::create_mirror_view(memSpace, view);
106 enum class PadCrsAction {
119 template<
class RowPtr,
class Indices,
class Values,
class Padding>
122 const PadCrsAction action,
123 const RowPtr& row_ptr_beg,
124 const RowPtr& row_ptr_end,
125 Indices& indices_wdv,
127 const Padding& padding,
131 using execution_space =
typename Indices::t_dev::execution_space;
132 using Kokkos::view_alloc;
133 using Kokkos::WithoutInitializing;
135 std::unique_ptr<std::string> prefix;
137 const size_t maxNumToPrint = verbose ?
140 std::ostringstream os;
141 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
142 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
143 os <<
"Start" << endl;
144 std::cerr << os.str();
146 Kokkos::HostSpace hostSpace;
149 std::ostringstream os;
150 os << *prefix <<
"On input: ";
152 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
159 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
164 os <<
", indices.extent(0): " << indices_wdv.extent(0)
165 <<
", values.extent(0): " << values_wdv.extent(0)
169 std::cerr << os.str();
172 if (row_ptr_beg.size() == 0) {
174 std::ostringstream os;
175 os << *prefix <<
"Done; local matrix has no rows" << endl;
176 std::cerr << os.str();
181 const size_t lclNumRows(row_ptr_beg.size() - 1);
182 RowPtr newAllocPerRow =
183 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
184 verbose, prefix.get());
186 std::ostringstream os;
187 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
188 std::cerr << os.str();
193 execution_space exec_space_instance = execution_space();
194 auto row_ptr_end_h = create_mirror_view(
195 hostSpace, row_ptr_end, verbose, prefix.get());
198 auto row_ptr_beg_h = create_mirror_view(
199 hostSpace, row_ptr_beg, verbose, prefix.get());
209 exec_space_instance.fence();
211 auto newAllocPerRow_h = create_mirror_view(
212 hostSpace, newAllocPerRow, verbose, prefix.get());
213 using host_range_type = Kokkos::RangePolicy<
214 Kokkos::DefaultHostExecutionSpace,
size_t>;
215 Kokkos::parallel_reduce
216 (
"Tpetra::CrsGraph: Compute new allocation size per row",
217 host_range_type(0, lclNumRows),
218 [&] (
const size_t lclRowInd,
size_t& lclIncrease) {
219 const size_t start = row_ptr_beg_h[lclRowInd];
220 const size_t end = row_ptr_beg_h[lclRowInd+1];
221 TEUCHOS_ASSERT( end >= start );
222 const size_t oldAllocSize = end -
start;
223 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] -
start;
224 TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
231 auto result = padding.get_result(lclRowInd);
232 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
233 if (newNumEnt > oldAllocSize) {
234 lclIncrease += (newNumEnt - oldAllocSize);
235 newAllocPerRow_h[lclRowInd] = newNumEnt;
238 newAllocPerRow_h[lclRowInd] = oldAllocSize;
243 std::ostringstream os;
244 os << *prefix <<
"increase: " << increase <<
", ";
248 std::cerr << os.str();
258 using inds_value_type =
259 typename Indices::t_dev::non_const_value_type;
260 using vals_value_type =
typename Values::t_dev::non_const_value_type;
263 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
264 const size_t newIndsSize = size_t(indices_old.size()) + increase;
265 auto indices_new = make_uninitialized_view<typename Indices::t_dev>(
266 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
269 typename Values::t_dev values_new;
270 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
271 if (action == PadCrsAction::INDICES_AND_VALUES) {
272 const size_t newValsSize = newIndsSize;
275 values_new = make_initialized_view<typename Values::t_dev>(
276 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
280 std::ostringstream os;
281 os << *prefix <<
"Repack" << endl;
282 std::cerr << os.str();
285 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
286 Kokkos::parallel_scan(
287 "Tpetra::CrsGraph or CrsMatrix repack",
288 range_type(
size_t(0),
size_t(lclNumRows+1)),
289 KOKKOS_LAMBDA (
const size_t lclRow,
size_t& newRowBeg,
290 const bool finalPass)
295 const size_t row_beg = row_ptr_beg[lclRow];
296 const size_t row_end =
297 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
298 const size_t numEnt = row_end - row_beg;
299 const size_t newRowAllocSize =
300 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
302 if (lclRow < lclNumRows) {
303 const Kokkos::pair<size_t, size_t> oldRange(
304 row_beg, row_beg + numEnt);
305 const Kokkos::pair<size_t, size_t> newRange(
306 newRowBeg, newRowBeg + numEnt);
307 auto oldColInds = Kokkos::subview(indices_old, oldRange);
308 auto newColInds = Kokkos::subview(indices_new, newRange);
311 memcpy(newColInds.data(), oldColInds.data(),
312 numEnt *
sizeof(inds_value_type));
313 if (action == PadCrsAction::INDICES_AND_VALUES) {
315 Kokkos::subview(values_old, oldRange);
316 auto newVals = Kokkos::subview(values_new, newRange);
317 memcpy((
void*) newVals.data(), oldVals.data(),
318 numEnt *
sizeof(vals_value_type));
322 row_ptr_beg[lclRow] = newRowBeg;
323 if (lclRow < lclNumRows) {
324 row_ptr_end[lclRow] = newRowBeg + numEnt;
327 newRowBeg += newRowAllocSize;
332 std::ostringstream os;
336 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
345 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
352 std::cout << os.str();
355 indices_wdv = Indices(indices_new);
356 values_wdv = Values(values_new);
360 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
361 auto values_h = values_wdv.getHostView(Access::ReadOnly);
362 std::ostringstream os;
373 std::ostringstream os;
374 os << *prefix <<
"Done" << endl;
375 std::cerr << os.str();
380 template <
class Po
inters,
class InOutIndices,
class InIndices,
class IndexMap>
383 typename Pointers::value_type
const row,
384 Pointers
const& row_ptrs,
385 InOutIndices& cur_indices,
386 size_t& num_assigned,
387 InIndices
const& new_indices,
389 std::function<
void(
size_t const,
size_t const,
size_t const)> cb)
391 if (new_indices.size() == 0) {
395 if (cur_indices.size() == 0) {
397 return Teuchos::OrdinalTraits<size_t>::invalid();
400 using offset_type =
typename std::decay<decltype (row_ptrs[0])>::type;
401 using ordinal_type =
typename std::decay<decltype (cur_indices[0])>::type;
403 const offset_type start = row_ptrs[row];
404 offset_type end = start +
static_cast<offset_type
> (num_assigned);
405 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t (0) :
406 row_ptrs[row + 1] - end;
407 const size_t num_new_indices =
static_cast<size_t> (new_indices.size ());
408 size_t num_inserted = 0;
410 size_t numIndicesLookup = num_assigned + num_new_indices;
413 const size_t useLookUpTableThreshold = 400;
415 if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
418 for (
size_t k = 0; k < num_new_indices; ++k) {
419 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
420 offset_type row_offset =
start;
421 for (; row_offset < end; ++row_offset) {
422 if (idx == cur_indices[row_offset]) {
427 if (row_offset == end) {
428 if (num_inserted >= num_avail) {
429 return Teuchos::OrdinalTraits<size_t>::invalid();
432 cur_indices[end++] = idx;
436 cb(k, start, row_offset - start);
442 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
445 for (
size_t k = 0; k < num_assigned; k++) {
446 idxLookup[cur_indices[start+k]] = start+k;
450 for (
size_t k = 0; k < num_new_indices; k++) {
451 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
452 offset_type row_offset;
454 auto it = idxLookup.find(idx);
455 if (it == idxLookup.end()) {
456 if (num_inserted >= num_avail) {
457 return Teuchos::OrdinalTraits<size_t>::invalid();
461 cur_indices[end++] = idx;
462 idxLookup[idx] = row_offset;
467 row_offset = it->second;
470 cb(k, start, row_offset - start);
474 num_assigned += num_inserted;
479 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
482 typename Pointers::value_type
const row,
483 Pointers
const& row_ptrs,
484 const size_t curNumEntries,
485 Indices1
const& cur_indices,
486 Indices2
const& new_indices,
490 if (new_indices.size() == 0)
494 typename std::remove_const<typename Indices1::value_type>::type;
495 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
497 const size_t start =
static_cast<size_t> (row_ptrs[row]);
498 const size_t end = start + curNumEntries;
499 size_t num_found = 0;
500 for (
size_t k = 0; k < new_indices.size(); k++)
502 auto row_offset =
start;
503 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
504 if (idx == invalid_ordinal)
506 for (; row_offset < end; row_offset++)
508 if (idx == cur_indices[row_offset])
510 std::forward<Callback>(cb)(k, start, row_offset - start);
538 template<
class RowPtr,
class Indices,
class Padding>
541 const RowPtr& rowPtrBeg,
542 const RowPtr& rowPtrEnd,
543 Indices& indices_wdv,
544 const Padding& padding,
548 using impl::pad_crs_arrays;
551 pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
552 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
553 indices_wdv, values_null, padding, my_rank, verbose);
556 template<
class RowPtr,
class Indices,
class Values,
class Padding>
559 const RowPtr& rowPtrBeg,
560 const RowPtr& rowPtrEnd,
561 Indices& indices_wdv,
563 const Padding& padding,
567 using impl::pad_crs_arrays;
568 pad_crs_arrays<RowPtr, Indices, Values, Padding>(
569 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
570 indices_wdv, values_wdv, padding, my_rank, verbose);
618 template <
class Po
inters,
class InOutIndices,
class InIndices>
621 typename Pointers::value_type
const row,
622 Pointers
const& rowPtrs,
623 InOutIndices& curIndices,
625 InIndices
const& newIndices,
626 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
627 std::function<
void(
const size_t,
const size_t,
const size_t)>())
629 static_assert(std::is_same<
typename std::remove_const<typename InOutIndices::value_type>::type,
630 typename std::remove_const<typename InIndices::value_type>::type>::value,
631 "Expected views to have same value type");
634 using ordinal =
typename InOutIndices::value_type;
635 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
636 numAssigned, newIndices, [](ordinal
const idx) {
return idx; }, cb);
640 template <
class Po
inters,
class InOutIndices,
class InIndices>
643 typename Pointers::value_type
const row,
644 Pointers
const& rowPtrs,
645 InOutIndices& curIndices,
647 InIndices
const& newIndices,
648 std::function<
typename InOutIndices::value_type(
const typename InIndices::value_type)> map,
649 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
650 std::function<
void(
const size_t,
const size_t,
const size_t)>())
652 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
653 numAssigned, newIndices, map, cb);
687 template <
class Po
inters,
class Indices1,
class Indices2,
class Callback>
690 typename Pointers::value_type
const row,
691 Pointers
const& rowPtrs,
692 const size_t curNumEntries,
693 Indices1
const& curIndices,
694 Indices2
const& newIndices,
697 static_assert(std::is_same<
typename std::remove_const<typename Indices1::value_type>::type,
698 typename std::remove_const<typename Indices2::value_type>::type>::value,
699 "Expected views to have same value type");
701 using ordinal =
typename Indices2::value_type;
702 auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
703 [=](ordinal ind){
return ind; }, cb);
707 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
710 typename Pointers::value_type
const row,
711 Pointers
const& rowPtrs,
712 const size_t curNumEntries,
713 Indices1
const& curIndices,
714 Indices2
const& newIndices,
718 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
724 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void start()
Start the deep_copy counter.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.