40 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
41 #define TPETRA_DETAILS_CRSUTILS_HPP
43 #include <type_traits>
45 #include "TpetraCore_config.h"
46 #include "Kokkos_Core.hpp"
48 #include "Tpetra_Details_CrsPadding.hpp"
63 template<
class ViewType>
65 make_uninitialized_view(
66 const std::string& name,
69 const std::string*
const prefix)
72 std::ostringstream os;
73 os << *prefix <<
"Allocate Kokkos::View " << name
74 <<
": " << size << std::endl;
75 std::cerr << os.str();
77 using Kokkos::view_alloc;
78 using Kokkos::WithoutInitializing;
79 return ViewType(view_alloc(name, WithoutInitializing), size);
82 template<
class ViewType>
84 make_initialized_view(
85 const std::string& name,
88 const std::string*
const prefix)
91 std::ostringstream os;
92 os << *prefix <<
"Allocate & initialize Kokkos::View "
93 << name <<
": " << size << std::endl;
94 std::cerr << os.str();
96 return ViewType(name, size);
99 template<
class OutViewType,
class InViewType>
101 assign_to_view(OutViewType& out,
102 const InViewType& in,
103 const char viewName[],
105 const std::string*
const prefix)
108 std::ostringstream os;
109 os << *prefix <<
"Assign to Kokkos::View " << viewName
110 <<
": Old size: " << out.extent(0)
111 <<
", New size: " << in.extent(0) << std::endl;
112 std::cerr << os.str();
117 template<
class MemorySpace,
class ViewType>
118 auto create_mirror_view(
119 const MemorySpace& memSpace,
120 const ViewType& view,
122 const std::string*
const prefix) ->
123 decltype(Kokkos::create_mirror_view(memSpace, view))
126 std::ostringstream os;
127 os << *prefix <<
"Create mirror view: "
128 <<
"view.extent(0): " << view.extent(0) << std::endl;
129 std::cerr << os.str();
131 return Kokkos::create_mirror_view(memSpace, view);
134 enum class PadCrsAction {
147 template<
class RowPtr,
class Indices,
class Values,
class Padding>
150 const PadCrsAction action,
151 const RowPtr& row_ptr_beg,
152 const RowPtr& row_ptr_end,
155 const Padding& padding,
159 using Kokkos::view_alloc;
160 using Kokkos::WithoutInitializing;
162 std::unique_ptr<std::string> prefix;
164 const size_t maxNumToPrint = verbose ?
167 std::ostringstream os;
168 os <<
"Proc " << my_rank <<
": Tpetra::...::pad_crs_arrays: ";
169 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
170 os <<
"Start" << endl;
171 std::cerr << os.str();
173 Kokkos::HostSpace hostSpace;
176 std::ostringstream os;
177 os << *prefix <<
"On input: ";
179 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
185 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
189 os <<
", indices.extent(0): " << indices.extent(0)
190 <<
", values.extent(0): " << values.extent(0)
194 std::cerr << os.str();
197 if (row_ptr_beg.size() == 0) {
199 std::ostringstream os;
200 os << *prefix <<
"Done; local matrix has no rows" << endl;
201 std::cerr << os.str();
206 const size_t lclNumRows(row_ptr_beg.size() - 1);
207 RowPtr newAllocPerRow =
208 make_uninitialized_view<RowPtr>(
"newAllocPerRow", lclNumRows,
209 verbose, prefix.get());
211 std::ostringstream os;
212 os << *prefix <<
"Fill newAllocPerRow & compute increase" << endl;
213 std::cerr << os.str();
217 auto row_ptr_end_h = create_mirror_view(
218 hostSpace, row_ptr_end, verbose, prefix.get());
220 auto row_ptr_beg_h = create_mirror_view(
221 hostSpace, row_ptr_beg, verbose, prefix.get());
224 auto newAllocPerRow_h = create_mirror_view(
225 hostSpace, newAllocPerRow, verbose, prefix.get());
226 using host_range_type = Kokkos::RangePolicy<
227 Kokkos::DefaultHostExecutionSpace,
size_t>;
228 Kokkos::parallel_reduce
229 (
"Tpetra::CrsGraph: Compute new allocation size per row",
230 host_range_type(0, lclNumRows),
231 [&] (
const size_t lclRowInd,
size_t& lclIncrease) {
232 const size_t start = row_ptr_beg_h[lclRowInd];
233 const size_t end = row_ptr_beg_h[lclRowInd+1];
234 TEUCHOS_ASSERT( end >= start );
235 const size_t oldAllocSize = end - start;
236 const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
237 TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
244 auto result = padding.get_result(lclRowInd);
245 const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
246 if (newNumEnt > oldAllocSize) {
247 lclIncrease += (newNumEnt - oldAllocSize);
248 newAllocPerRow_h[lclRowInd] = newNumEnt;
251 newAllocPerRow_h[lclRowInd] = oldAllocSize;
256 std::ostringstream os;
257 os << *prefix <<
"increase: " << increase <<
", ";
261 std::cerr << os.str();
270 using inds_value_type =
typename Indices::non_const_value_type;
271 using vals_value_type =
typename Values::non_const_value_type;
273 const size_t newIndsSize = size_t(indices.size()) + increase;
274 auto indices_new = make_uninitialized_view<Indices>(
275 "Tpetra::CrsGraph column indices", newIndsSize, verbose,
279 if (action == PadCrsAction::INDICES_AND_VALUES) {
280 const size_t newValsSize = newIndsSize;
283 values_new = make_initialized_view<Values>(
284 "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
288 std::ostringstream os;
289 os << *prefix <<
"Repack" << endl;
290 std::cerr << os.str();
292 using execution_space =
typename Indices::execution_space;
293 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
294 Kokkos::parallel_scan(
295 "Tpetra::CrsGraph or CrsMatrix repack",
296 range_type(0, lclNumRows+1),
297 KOKKOS_LAMBDA (
const size_t lclRow,
size_t& newRowBeg,
298 const bool finalPass)
303 const size_t row_beg = row_ptr_beg[lclRow];
304 const size_t row_end =
305 lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
306 const size_t numEnt = row_end - row_beg;
307 const size_t newRowAllocSize =
308 lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
310 if (lclRow < lclNumRows) {
311 const Kokkos::pair<size_t, size_t> oldRange(
312 row_beg, row_beg + numEnt);
313 const Kokkos::pair<size_t, size_t> newRange(
314 newRowBeg, newRowBeg + numEnt);
315 auto oldColInds = subview(indices, oldRange);
316 auto newColInds = subview(indices_new, newRange);
319 memcpy(newColInds.data(), oldColInds.data(),
320 numEnt *
sizeof(inds_value_type));
321 if (action == PadCrsAction::INDICES_AND_VALUES) {
322 auto oldVals = subview(values, oldRange);
323 auto newVals = subview(values_new, newRange);
324 memcpy(newVals.data(), oldVals.data(),
325 numEnt *
sizeof(vals_value_type));
329 row_ptr_beg[lclRow] = newRowBeg;
330 if (lclRow < lclNumRows) {
331 row_ptr_end[lclRow] = newRowBeg + numEnt;
334 newRowBeg += newRowAllocSize;
338 std::ostringstream os;
342 Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
350 Kokkos::create_mirror_view(hostSpace, row_ptr_end);
356 std::cerr << os.str();
359 assign_to_view(indices, indices_new,
360 "Tpetra::CrsGraph column indices",
361 verbose, prefix.get());
362 assign_to_view(values, values_new,
363 "Tpetra::CrsMatrix values",
364 verbose, prefix.get());
367 auto indices_h = Kokkos::create_mirror_view(hostSpace, indices);
369 auto values_h = Kokkos::create_mirror_view(hostSpace, values);
371 std::ostringstream os;
382 std::ostringstream os;
383 os << *prefix <<
"Done" << endl;
384 std::cerr << os.str();
389 template <
class Po
inters,
class InOutIndices,
class InIndices,
class IndexMap>
392 typename Pointers::value_type
const row,
393 Pointers
const& row_ptrs,
394 InOutIndices& cur_indices,
395 size_t& num_assigned,
396 InIndices
const& new_indices,
398 std::function<
void(
size_t const,
size_t const,
size_t const)> cb)
400 if (new_indices.size() == 0) {
404 if (cur_indices.size() == 0) {
406 return Teuchos::OrdinalTraits<size_t>::invalid();
409 using offset_type =
typename std::decay<decltype (row_ptrs[0])>::type;
410 using ordinal_type =
typename std::decay<decltype (cur_indices[0])>::type;
412 const offset_type start = row_ptrs[row];
413 offset_type end = start +
static_cast<offset_type
> (num_assigned);
414 const size_t num_avail = (row_ptrs[row + 1] < end) ?
size_t (0) :
415 row_ptrs[row + 1] - end;
416 const size_t num_new_indices =
static_cast<size_t> (new_indices.size ());
417 size_t num_inserted = 0;
419 for (
size_t k = 0; k < num_new_indices; ++k) {
420 const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
421 offset_type row_offset = start;
422 for (; row_offset < end; ++row_offset) {
423 if (idx == cur_indices[row_offset]) {
428 if (row_offset == end) {
429 if (num_inserted >= num_avail) {
430 return Teuchos::OrdinalTraits<size_t>::invalid();
433 cur_indices[end++] = idx;
437 cb(k, start, row_offset - start);
440 num_assigned += num_inserted;
445 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
448 typename Pointers::value_type
const row,
449 Pointers
const& row_ptrs,
450 const size_t curNumEntries,
451 Indices1
const& cur_indices,
452 Indices2
const& new_indices,
456 if (new_indices.size() == 0)
459 using ordinal =
typename Indices1::value_type;
460 auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
462 const size_t start =
static_cast<size_t> (row_ptrs[row]);
463 const size_t end = start + curNumEntries;
464 size_t num_found = 0;
465 for (
size_t k = 0; k < new_indices.size(); k++)
467 auto row_offset = start;
468 auto idx = std::forward<IndexMap>(map)(new_indices[k]);
469 if (idx == invalid_ordinal)
471 for (; row_offset < end; row_offset++)
473 if (idx == cur_indices[row_offset])
475 std::forward<Callback>(cb)(k, start, row_offset - start);
503 template<
class RowPtr,
class Indices,
class Padding>
506 const RowPtr& rowPtrBeg,
507 const RowPtr& rowPtrEnd,
509 const Padding& padding,
513 using impl::pad_crs_arrays;
516 pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
517 impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
518 indices, values, padding, my_rank, verbose);
521 template<
class RowPtr,
class Indices,
class Values,
class Padding>
524 const RowPtr& rowPtrBeg,
525 const RowPtr& rowPtrEnd,
528 const Padding& padding,
532 using impl::pad_crs_arrays;
533 pad_crs_arrays<RowPtr, Indices, Values, Padding>(
534 impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
535 indices, values, padding, my_rank, verbose);
583 template <
class Po
inters,
class InOutIndices,
class InIndices>
586 typename Pointers::value_type
const row,
587 Pointers
const& rowPtrs,
588 InOutIndices& curIndices,
590 InIndices
const& newIndices,
591 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
592 std::function<
void(
const size_t,
const size_t,
const size_t)>())
594 static_assert(std::is_same<
typename std::remove_const<typename InOutIndices::value_type>::type,
595 typename std::remove_const<typename InIndices::value_type>::type>::value,
596 "Expected views to have same value type");
599 using ordinal =
typename InOutIndices::value_type;
600 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
601 numAssigned, newIndices, [](ordinal
const idx) {
return idx; }, cb);
605 template <
class Po
inters,
class InOutIndices,
class InIndices>
608 typename Pointers::value_type
const row,
609 Pointers
const& rowPtrs,
610 InOutIndices& curIndices,
612 InIndices
const& newIndices,
613 std::function<
typename InOutIndices::value_type(
const typename InIndices::value_type)> map,
614 std::function<
void(
const size_t,
const size_t,
const size_t)> cb =
615 std::function<
void(
const size_t,
const size_t,
const size_t)>())
617 auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
618 numAssigned, newIndices, map, cb);
652 template <
class Po
inters,
class Indices1,
class Indices2,
class Callback>
655 typename Pointers::value_type
const row,
656 Pointers
const& rowPtrs,
657 const size_t curNumEntries,
658 Indices1
const& curIndices,
659 Indices2
const& newIndices,
662 static_assert(std::is_same<
typename std::remove_const<typename Indices1::value_type>::type,
663 typename std::remove_const<typename Indices2::value_type>::type>::value,
664 "Expected views to have same value type");
666 using ordinal =
typename Indices2::value_type;
667 auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
668 [=](ordinal ind){
return ind; }, cb);
672 template <
class Po
inters,
class Indices1,
class Indices2,
class IndexMap,
class Callback>
675 typename Pointers::value_type
const row,
676 Pointers
const& rowPtrs,
677 const size_t curNumEntries,
678 Indices1
const& curIndices,
679 Indices2
const& newIndices,
683 return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
689 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.