10 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
11 #define TPETRA_DETAILS_CRSUTILS_HPP
13 #include <type_traits>
15 #include "TpetraCore_config.h"
16 #include "Kokkos_Core.hpp"
18 #include "Tpetra_Details_CrsPadding.hpp"
19 #include "Tpetra_Details_WrappedDualView.hpp"
22 #include <unordered_map>
35 template <
class ViewType>
37 make_uninitialized_view(
38 const std::string& name,
41 const std::string*
const prefix) {
43 std::ostringstream os;
44 os << *prefix <<
"Allocate Kokkos::View " << name
45 <<
": " << size << std::endl;
46 std::cerr << os.str();
48 using Kokkos::view_alloc;
49 using Kokkos::WithoutInitializing;
50 return ViewType(view_alloc(name, WithoutInitializing), size);
53 template <
class ViewType>
55 make_initialized_view(
56 const std::string& name,
59 const std::string*
const prefix) {
61 std::ostringstream os;
62 os << *prefix <<
"Allocate & initialize Kokkos::View "
63 << name <<
": " << size << std::endl;
64 std::cerr << os.str();
66 return ViewType(name, size);
69 template <
class OutViewType,
class InViewType>
70 void assign_to_view(OutViewType& out,
72 const char viewName[],
74 const std::string*
const prefix) {
76 std::ostringstream os;
77 os << *prefix <<
"Assign to Kokkos::View " << viewName
78 <<
": Old size: " << out.extent(0)
79 <<
", New size: " << in.extent(0) << std::endl;
80 std::cerr << os.str();
85 template <
class MemorySpace,
class ViewType>
86 auto create_mirror_view(
87 const MemorySpace& memSpace,
90 const std::string*
const prefix) -> decltype(Kokkos::create_mirror_view(memSpace, view)) {
92 std::ostringstream os;
93 os << *prefix <<
"Create mirror view: "
94 <<
"view.extent(0): " << view.extent(0) << std::endl;
95 std::cerr << os.str();
97 return Kokkos::create_mirror_view(memSpace, view);
100 enum class PadCrsAction {
113 template <
class RowPtr,
class Indices,
class Values,
class Padding>
115 const PadCrsAction action,
116 const RowPtr& row_ptr_beg,
117 const RowPtr& row_ptr_end,
118 Indices& indices_wdv,
120 const Padding& padding,
122 const bool verbose) {
123 using execution_space =
typename Indices::t_dev::execution_space;
124 using Kokkos::view_alloc;
125 using Kokkos::WithoutInitializing;
127 std::unique_ptr<std::string> prefix;
131 std::ostringstream os;
132 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
133 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
134 os <<
"Start" << endl;
135 std::cerr << os.str();
137 Kokkos::HostSpace hostSpace;
140 std::ostringstream os;
141 os << *prefix <<
"On input: ";
143 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
150 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
155 os <<
", indices.extent(0): " << indices_wdv.extent(0)
156 <<
", values.extent(0): " << values_wdv.extent(0)
160 std::cerr << os.str();
163 if (row_ptr_beg.size() == 0) {
165 std::ostringstream os;
166 os << *prefix <<
"Done; local matrix has no rows" << endl;
167 std::cerr << os.str();
172 const size_t lclNumRows(row_ptr_beg.size() - 1);
173 RowPtr newAllocPerRow =
174 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
175 verbose, prefix.get());
177 std::ostringstream os;
178 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
179 std::cerr << os.str();
184 execution_space exec_space_instance = execution_space();
185 auto row_ptr_end_h = create_mirror_view(
186 hostSpace, row_ptr_end, verbose, prefix.get());
189 auto row_ptr_beg_h = create_mirror_view(
190 hostSpace, row_ptr_beg, verbose, prefix.get());
200 exec_space_instance.fence();
202 auto newAllocPerRow_h = create_mirror_view(
203 hostSpace, newAllocPerRow, verbose, prefix.get());
204 using host_range_type = Kokkos::RangePolicy<
205 Kokkos::DefaultHostExecutionSpace,
size_t>;
206 Kokkos::parallel_reduce(
207 "Tpetra::CrsGraph: Compute new allocation size per row",
208 host_range_type(0, lclNumRows),
209 [&](
const size_t lclRowInd,
size_t& lclIncrease) {
210 const size_t start = row_ptr_beg_h[lclRowInd];
211 const size_t end = row_ptr_beg_h[lclRowInd + 1];
212 TEUCHOS_ASSERT(end >= start);
213 const size_t oldAllocSize = end -
start;
214 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] -
start;
215 TEUCHOS_ASSERT(oldNumEnt <= oldAllocSize);
222 auto result = padding.get_result(lclRowInd);
223 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
224 if (newNumEnt > oldAllocSize) {
225 lclIncrease += (newNumEnt - oldAllocSize);
226 newAllocPerRow_h[lclRowInd] = newNumEnt;
228 newAllocPerRow_h[lclRowInd] = oldAllocSize;
234 std::ostringstream os;
235 os << *prefix <<
"increase: " << increase <<
", ";
239 std::cerr << os.str();
249 using inds_value_type =
250 typename Indices::t_dev::non_const_value_type;
251 using vals_value_type =
typename Values::t_dev::non_const_value_type;
254 auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
255 const size_t newIndsSize = size_t(indices_old.size()) + increase;
256 auto indices_new = make_uninitialized_view<typename Indices::t_dev>(
257 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
260 typename Values::t_dev values_new;
261 auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
262 if (action == PadCrsAction::INDICES_AND_VALUES) {
263 const size_t newValsSize = newIndsSize;
266 values_new = make_initialized_view<typename Values::t_dev>(
267 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
271 std::ostringstream os;
272 os << *prefix <<
"Repack" << endl;
273 std::cerr << os.str();
276 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
277 Kokkos::parallel_scan(
278 "Tpetra::CrsGraph or CrsMatrix repack",
279 range_type(
size_t(0),
size_t(lclNumRows + 1)),
280 KOKKOS_LAMBDA(
const size_t lclRow,
size_t& newRowBeg,
281 const bool finalPass) {
285 const size_t row_beg = row_ptr_beg[lclRow];
286 const size_t row_end =
287 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
288 const size_t numEnt = row_end - row_beg;
289 const size_t newRowAllocSize =
290 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
292 if (lclRow < lclNumRows) {
293 const Kokkos::pair<size_t, size_t> oldRange(
294 row_beg, row_beg + numEnt);
295 const Kokkos::pair<size_t, size_t> newRange(
296 newRowBeg, newRowBeg + numEnt);
297 auto oldColInds = Kokkos::subview(indices_old, oldRange);
298 auto newColInds = Kokkos::subview(indices_new, newRange);
301 memcpy(newColInds.data(), oldColInds.data(),
302 numEnt *
sizeof(inds_value_type));
303 if (action == PadCrsAction::INDICES_AND_VALUES) {
305 Kokkos::subview(values_old, oldRange);
306 auto newVals = Kokkos::subview(values_new, newRange);
307 memcpy((
void*)newVals.data(), oldVals.data(),
308 numEnt *
sizeof(vals_value_type));
312 row_ptr_beg[lclRow] = newRowBeg;
313 if (lclRow < lclNumRows) {
314 row_ptr_end[lclRow] = newRowBeg + numEnt;
317 newRowBeg += newRowAllocSize;
321 std::ostringstream os;
325 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
334 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
341 std::cout << os.str();
344 indices_wdv = Indices(indices_new);
345 values_wdv = Values(values_new);
349 auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
350 auto values_h = values_wdv.getHostView(Access::ReadOnly);
351 std::ostringstream os;
362 std::ostringstream os;
363 os << *prefix <<
"Done" << endl;
364 std::cerr << os.str();
369 template <
class Po
inters,
class InOutIndices,
class InIndices,
class IndexMap>
372 typename Pointers::value_type
const row,
373 Pointers
const& row_ptrs,
374 InOutIndices& cur_indices,
375 size_t& num_assigned,
376 InIndices
const& new_indices,
378 std::function<
void(
size_t const,
size_t const,
size_t const)> cb) {
379 if (new_indices.size() == 0) {
383 if (cur_indices.size() == 0) {
385 return Teuchos::OrdinalTraits<size_t>::invalid();
388 using offset_type =
typename std::decay<decltype(row_ptrs[0])>::type;
389 using ordinal_type =
typename std::decay<decltype(cur_indices[0])>::type;
391 const offset_type start = row_ptrs[row];
392 offset_type end = start +
static_cast<offset_type
>(num_assigned);
393 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t(0) : row_ptrs[row + 1] - end;
394 const size_t num_new_indices =
static_cast<size_t>(new_indices.size());
395 size_t num_inserted = 0;
397 size_t numIndicesLookup = num_assigned + num_new_indices;
400 const size_t useLookUpTableThreshold = 400;
402 if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
405 for (
size_t k = 0; k < num_new_indices; ++k) {
406 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
407 offset_type row_offset =
start;
408 for (; row_offset < end; ++row_offset) {
409 if (idx == cur_indices[row_offset]) {
414 if (row_offset == end) {
415 if (num_inserted >= num_avail) {
416 return Teuchos::OrdinalTraits<size_t>::invalid();
419 cur_indices[end++] = idx;
423 cb(k, start, row_offset - start);
428 std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
431 for (
size_t k = 0; k < num_assigned; k++) {
432 idxLookup[cur_indices[start + k]] = start + k;
436 for (
size_t k = 0; k < num_new_indices; k++) {
437 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
438 offset_type row_offset;
440 auto it = idxLookup.find(idx);
441 if (it == idxLookup.end()) {
442 if (num_inserted >= num_avail) {
443 return Teuchos::OrdinalTraits<size_t>::invalid();
447 cur_indices[end++] = idx;
448 idxLookup[idx] = row_offset;
452 row_offset = it->second;
455 cb(k, start, row_offset - start);
459 num_assigned += num_inserted;
464 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
467 typename Pointers::value_type
const row,
468 Pointers
const& row_ptrs,
469 const size_t curNumEntries,
470 Indices1
const& cur_indices,
471 Indices2
const& new_indices,
474 if (new_indices.size() == 0)
478 typename std::remove_const<typename Indices1::value_type>::type;
479 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
481 const size_t start =
static_cast<size_t>(row_ptrs[row]);
482 const size_t end = start + curNumEntries;
483 size_t num_found = 0;
484 for (
size_t k = 0; k < new_indices.size(); k++) {
485 auto row_offset =
start;
486 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
487 if (idx == invalid_ordinal)
489 for (; row_offset < end; row_offset++) {
490 if (idx == cur_indices[row_offset]) {
491 std::forward<Callback>(cb)(k, start, row_offset - start);
518 template <
class RowPtr,
class Indices,
class Padding>
520 const RowPtr& rowPtrBeg,
521 const RowPtr& rowPtrEnd,
522 Indices& indices_wdv,
523 const Padding& padding,
525 const bool verbose) {
526 using impl::pad_crs_arrays;
529 pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
530 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
531 indices_wdv, values_null, padding, my_rank, verbose);
534 template <
class RowPtr,
class Indices,
class Values,
class Padding>
536 const RowPtr& rowPtrBeg,
537 const RowPtr& rowPtrEnd,
538 Indices& indices_wdv,
540 const Padding& padding,
542 const bool verbose) {
543 using impl::pad_crs_arrays;
544 pad_crs_arrays<RowPtr, Indices, Values, Padding>(
545 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
546 indices_wdv, values_wdv, padding, my_rank, verbose);
594 template <
class Po
inters,
class InOutIndices,
class InIndices>
597 typename Pointers::value_type
const row,
598 Pointers
const& rowPtrs,
599 InOutIndices& curIndices,
601 InIndices
const& newIndices,
602 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
603 std::function<
void(
const size_t,
const size_t,
const size_t)>()) {
604 static_assert(std::is_same<
typename std::remove_const<typename InOutIndices::value_type>::type,
605 typename std::remove_const<typename InIndices::value_type>::type>::value,
606 "Expected views to have same value type");
609 using ordinal =
typename InOutIndices::value_type;
610 auto numInserted = impl::insert_crs_indices(
611 row, rowPtrs, curIndices,
612 numAssigned, newIndices, [](ordinal
const idx) {
return idx; }, cb);
616 template <
class Po
inters,
class InOutIndices,
class InIndices>
619 typename Pointers::value_type
const row,
620 Pointers
const& rowPtrs,
621 InOutIndices& curIndices,
623 InIndices
const& newIndices,
624 std::function<
typename InOutIndices::value_type(
const typename InIndices::value_type)> map,
625 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
626 std::function<
void(
const size_t,
const size_t,
const size_t)>()) {
627 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
628 numAssigned, newIndices, map, cb);
661 template <
class Po
inters,
class Indices1,
class Indices2,
class Callback>
664 typename Pointers::value_type
const row,
665 Pointers
const& rowPtrs,
666 const size_t curNumEntries,
667 Indices1
const& curIndices,
668 Indices2
const& newIndices,
670 static_assert(std::is_same<
typename std::remove_const<typename Indices1::value_type>::type,
671 typename std::remove_const<typename Indices2::value_type>::type>::value,
672 "Expected views to have same value type");
674 using ordinal =
typename Indices2::value_type;
675 auto numFound = impl::find_crs_indices(
676 row, rowPtrs, curNumEntries, curIndices, newIndices,
677 [=](ordinal ind) {
return ind; }, cb);
681 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
684 typename Pointers::value_type
const row,
685 Pointers
const& rowPtrs,
686 const size_t curNumEntries,
687 Indices1
const& curIndices,
688 Indices2
const& newIndices,
691 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
697 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void start()
Start the deep_copy counter.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.