40 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
41 #define TPETRA_DETAILS_CRSUTILS_HPP
43 #include <type_traits>
45 #include "TpetraCore_config.h"
46 #include "Kokkos_Core.hpp"
48 #include "Tpetra_Details_CrsPadding.hpp"
49 #include "Tpetra_Details_WrappedDualView.hpp"
52 #include <unordered_map>
65 template<
class ViewType>
67 make_uninitialized_view(
68 const std::string& name,
71 const std::string*
const prefix)
74 std::ostringstream os;
75 os << *prefix <<
"Allocate Kokkos::View " << name
76 <<
": " << size << std::endl;
77 std::cerr << os.str();
79 using Kokkos::view_alloc;
80 using Kokkos::WithoutInitializing;
81 return ViewType(view_alloc(name, WithoutInitializing), size);
84 template<
class ViewType>
86 make_initialized_view(
87 const std::string& name,
90 const std::string*
const prefix)
93 std::ostringstream os;
94 os << *prefix <<
"Allocate & initialize Kokkos::View "
95 << name <<
": " << size << std::endl;
96 std::cerr << os.str();
98 return ViewType(name, size);
101 template<
class OutViewType,
class InViewType>
103 assign_to_view(OutViewType& out,
104 const InViewType& in,
105 const char viewName[],
107 const std::string*
const prefix)
110 std::ostringstream os;
111 os << *prefix <<
"Assign to Kokkos::View " << viewName
112 <<
": Old size: " << out.extent(0)
113 <<
", New size: " << in.extent(0) << std::endl;
114 std::cerr << os.str();
119 template<
class MemorySpace,
class ViewType>
120 auto create_mirror_view(
121 const MemorySpace& memSpace,
122 const ViewType& view,
124 const std::string*
const prefix) ->
125 decltype(Kokkos::create_mirror_view(memSpace, view))
128 std::ostringstream os;
129 os << *prefix <<
"Create mirror view: "
130 <<
"view.extent(0): " << view.extent(0) << std::endl;
131 std::cerr << os.str();
133 return Kokkos::create_mirror_view(memSpace, view);
136 enum class PadCrsAction {
149 template<
class RowPtr,
class Indices,
class Values,
class Padding>
152 const PadCrsAction action,
153 const RowPtr& row_ptr_beg,
154 const RowPtr& row_ptr_end,
155 Indices& indices_wdv,
157 const Padding& padding,
161 using execution_space =
typename Indices::t_dev::execution_space;
162 using Kokkos::view_alloc;
163 using Kokkos::WithoutInitializing;
165 std::unique_ptr<std::string> prefix;
167 const size_t maxNumToPrint = verbose ?
170 std::ostringstream os;
171 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
172 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
173 os <<
"Start" << endl;
174 std::cerr << os.str();
176 Kokkos::HostSpace hostSpace;
179 std::ostringstream os;
180 os << *prefix <<
"On input: ";
182 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
189 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
194 os <<
", indices.extent(0): " << indices_wdv.extent(0)
195 <<
", values.extent(0): " << values_wdv.extent(0)
199 std::cerr << os.str();
202 if (row_ptr_beg.size() == 0) {
204 std::ostringstream os;
205 os << *prefix <<
"Done; local matrix has no rows" << endl;
206 std::cerr << os.str();
211 const size_t lclNumRows(row_ptr_beg.size() - 1);
212 RowPtr newAllocPerRow =
213 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
214 verbose, prefix.get());
216 std::ostringstream os;
217 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
218 std::cerr << os.str();
223 execution_space exec_space_instance = execution_space();
224 auto row_ptr_end_h = create_mirror_view(
225 hostSpace, row_ptr_end, verbose, prefix.get());
228 auto row_ptr_beg_h = create_mirror_view(
229 hostSpace, row_ptr_beg, verbose, prefix.get());
239 exec_space_instance.fence();
241 auto newAllocPerRow_h = create_mirror_view(
242 hostSpace, newAllocPerRow, verbose, prefix.get());
243 using host_range_type = Kokkos::RangePolicy<
244 Kokkos::DefaultHostExecutionSpace,
size_t>;
245 Kokkos::parallel_reduce
246 (
"Tpetra::CrsGraph: Compute new allocation size per row",
247 host_range_type(0, lclNumRows),
248 [&] (
const size_t lclRowInd,
size_t& lclIncrease) {
249 const size_t start = row_ptr_beg_h[lclRowInd];
250 const size_t end = row_ptr_beg_h[lclRowInd+1];
251 TEUCHOS_ASSERT( end >= start );
252 const size_t oldAllocSize = end -
start;
253 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] -
start;
254 TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
261 auto result = padding.get_result(lclRowInd);
262 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
263 if (newNumEnt > oldAllocSize) {
264 lclIncrease += (newNumEnt - oldAllocSize);
265 newAllocPerRow_h[lclRowInd] = newNumEnt;
268 newAllocPerRow_h[lclRowInd] = oldAllocSize;
273 std::ostringstream os;
274 os << *prefix <<
"increase: " << increase <<
", ";
278 std::cerr << os.str();
288 using inds_value_type =
289 typename Indices::t_dev::non_const_value_type;
290 using vals_value_type =
typename Values::t_dev::non_const_value_type;
293 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
294 const size_t newIndsSize = size_t(indices_old.size()) + increase;
295 auto indices_new = make_uninitialized_view<typename Indices::t_dev>(
296 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
299 typename Values::t_dev values_new;
300 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
301 if (action == PadCrsAction::INDICES_AND_VALUES) {
302 const size_t newValsSize = newIndsSize;
305 values_new = make_initialized_view<typename Values::t_dev>(
306 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
310 std::ostringstream os;
311 os << *prefix <<
"Repack" << endl;
312 std::cerr << os.str();
315 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
316 Kokkos::parallel_scan(
317 "Tpetra::CrsGraph or CrsMatrix repack",
318 range_type(
size_t(0),
size_t(lclNumRows+1)),
319 KOKKOS_LAMBDA (
const size_t lclRow,
size_t& newRowBeg,
320 const bool finalPass)
325 const size_t row_beg = row_ptr_beg[lclRow];
326 const size_t row_end =
327 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
328 const size_t numEnt = row_end - row_beg;
329 const size_t newRowAllocSize =
330 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
332 if (lclRow < lclNumRows) {
333 const Kokkos::pair<size_t, size_t> oldRange(
334 row_beg, row_beg + numEnt);
335 const Kokkos::pair<size_t, size_t> newRange(
336 newRowBeg, newRowBeg + numEnt);
337 auto oldColInds = Kokkos::subview(indices_old, oldRange);
338 auto newColInds = Kokkos::subview(indices_new, newRange);
341 memcpy(newColInds.data(), oldColInds.data(),
342 numEnt *
sizeof(inds_value_type));
343 if (action == PadCrsAction::INDICES_AND_VALUES) {
345 Kokkos::subview(values_old, oldRange);
346 auto newVals = Kokkos::subview(values_new, newRange);
347 memcpy(newVals.data(), oldVals.data(),
348 numEnt *
sizeof(vals_value_type));
352 row_ptr_beg[lclRow] = newRowBeg;
353 if (lclRow < lclNumRows) {
354 row_ptr_end[lclRow] = newRowBeg + numEnt;
357 newRowBeg += newRowAllocSize;
362 std::ostringstream os;
366 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
375 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
382 std::cout << os.str();
385 indices_wdv = Indices(indices_new);
386 values_wdv = Values(values_new);
390 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
391 auto values_h = values_wdv.getHostView(Access::ReadOnly);
392 std::ostringstream os;
403 std::ostringstream os;
404 os << *prefix <<
"Done" << endl;
405 std::cerr << os.str();
410 template <
class Po
inters,
class InOutIndices,
class InIndices,
class IndexMap>
413 typename Pointers::value_type
const row,
414 Pointers
const& row_ptrs,
415 InOutIndices& cur_indices,
416 size_t& num_assigned,
417 InIndices
const& new_indices,
419 std::function<
void(
size_t const,
size_t const,
size_t const)> cb)
421 if (new_indices.size() == 0) {
425 if (cur_indices.size() == 0) {
427 return Teuchos::OrdinalTraits<size_t>::invalid();
430 using offset_type =
typename std::decay<decltype (row_ptrs[0])>::type;
431 using ordinal_type =
typename std::decay<decltype (cur_indices[0])>::type;
433 const offset_type start = row_ptrs[row];
434 offset_type end = start +
static_cast<offset_type
> (num_assigned);
435 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t (0) :
436 row_ptrs[row + 1] - end;
437 const size_t num_new_indices =
static_cast<size_t> (new_indices.size ());
438 size_t num_inserted = 0;
440 size_t numIndicesLookup = num_assigned + num_new_indices;
443 const size_t useLookUpTableThreshold = 400;
445 if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
448 for (
size_t k = 0; k < num_new_indices; ++k) {
449 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
450 offset_type row_offset =
start;
451 for (; row_offset < end; ++row_offset) {
452 if (idx == cur_indices[row_offset]) {
457 if (row_offset == end) {
458 if (num_inserted >= num_avail) {
459 return Teuchos::OrdinalTraits<size_t>::invalid();
462 cur_indices[end++] = idx;
466 cb(k, start, row_offset - start);
472 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
475 for (
size_t k = 0; k < num_assigned; k++) {
476 idxLookup[cur_indices[start+k]] = start+k;
480 for (
size_t k = 0; k < num_new_indices; k++) {
481 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
482 offset_type row_offset;
484 auto it = idxLookup.find(idx);
485 if (it == idxLookup.end()) {
486 if (num_inserted >= num_avail) {
487 return Teuchos::OrdinalTraits<size_t>::invalid();
491 cur_indices[end++] = idx;
492 idxLookup[idx] = row_offset;
497 row_offset = it->second;
500 cb(k, start, row_offset - start);
504 num_assigned += num_inserted;
509 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
512 typename Pointers::value_type
const row,
513 Pointers
const& row_ptrs,
514 const size_t curNumEntries,
515 Indices1
const& cur_indices,
516 Indices2
const& new_indices,
520 if (new_indices.size() == 0)
524 typename std::remove_const<typename Indices1::value_type>::type;
525 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
527 const size_t start =
static_cast<size_t> (row_ptrs[row]);
528 const size_t end = start + curNumEntries;
529 size_t num_found = 0;
530 for (
size_t k = 0; k < new_indices.size(); k++)
532 auto row_offset =
start;
533 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
534 if (idx == invalid_ordinal)
536 for (; row_offset < end; row_offset++)
538 if (idx == cur_indices[row_offset])
540 std::forward<Callback>(cb)(k, start, row_offset - start);
568 template<
class RowPtr,
class Indices,
class Padding>
571 const RowPtr& rowPtrBeg,
572 const RowPtr& rowPtrEnd,
573 Indices& indices_wdv,
574 const Padding& padding,
578 using impl::pad_crs_arrays;
581 pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
582 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
583 indices_wdv, values_null, padding, my_rank, verbose);
586 template<
class RowPtr,
class Indices,
class Values,
class Padding>
589 const RowPtr& rowPtrBeg,
590 const RowPtr& rowPtrEnd,
591 Indices& indices_wdv,
593 const Padding& padding,
597 using impl::pad_crs_arrays;
598 pad_crs_arrays<RowPtr, Indices, Values, Padding>(
599 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
600 indices_wdv, values_wdv, padding, my_rank, verbose);
648 template <
class Po
inters,
class InOutIndices,
class InIndices>
651 typename Pointers::value_type
const row,
652 Pointers
const& rowPtrs,
653 InOutIndices& curIndices,
655 InIndices
const& newIndices,
656 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
657 std::function<
void(
const size_t,
const size_t,
const size_t)>())
659 static_assert(std::is_same<
typename std::remove_const<typename InOutIndices::value_type>::type,
660 typename std::remove_const<typename InIndices::value_type>::type>::value,
661 "Expected views to have same value type");
664 using ordinal =
typename InOutIndices::value_type;
665 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
666 numAssigned, newIndices, [](ordinal
const idx) {
return idx; }, cb);
670 template <
class Po
inters,
class InOutIndices,
class InIndices>
673 typename Pointers::value_type
const row,
674 Pointers
const& rowPtrs,
675 InOutIndices& curIndices,
677 InIndices
const& newIndices,
678 std::function<
typename InOutIndices::value_type(
const typename InIndices::value_type)> map,
679 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
680 std::function<
void(
const size_t,
const size_t,
const size_t)>())
682 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
683 numAssigned, newIndices, map, cb);
717 template <
class Po
inters,
class Indices1,
class Indices2,
class Callback>
720 typename Pointers::value_type
const row,
721 Pointers
const& rowPtrs,
722 const size_t curNumEntries,
723 Indices1
const& curIndices,
724 Indices2
const& newIndices,
727 static_assert(std::is_same<
typename std::remove_const<typename Indices1::value_type>::type,
728 typename std::remove_const<typename Indices2::value_type>::type>::value,
729 "Expected views to have same value type");
731 using ordinal =
typename Indices2::value_type;
732 auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
733 [=](ordinal ind){
return ind; }, cb);
737 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
740 typename Pointers::value_type
const row,
741 Pointers
const& rowPtrs,
742 const size_t curNumEntries,
743 Indices1
const& curIndices,
744 Indices2
const& newIndices,
748 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
754 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void start()
Start the deep_copy counter.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.