10 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
13 #include "KokkosSparse_CrsMatrix.hpp"
15 #include <type_traits>
57 template <
class SparseMatrixType,
60 typename SparseMatrixType::ordinal_type
62 const typename SparseMatrixType::ordinal_type lclRow,
63 const typename SparseMatrixType::ordinal_type lclColInds[],
64 const typename SparseMatrixType::ordinal_type sortPerm[],
65 const ValsViewType& vals,
66 const typename SparseMatrixType::ordinal_type numEntInInput,
67 const bool forceAtomic =
68 #ifdef KOKKOS_ENABLE_SERIAL
69 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
73 const bool checkInputIndices =
true) {
74 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
76 static_assert(std::is_same<matrix_scalar_type,
77 typename SparseMatrixType::value_type>::value,
78 "The matrix's entries must have a nonconst type.");
83 typedef typename SparseMatrixType::ordinal_type LO;
84 static_assert(std::is_integral<LO>::value,
85 "SparseMatrixType::ordinal_type "
86 "must be a built-in integer type.");
91 auto row_view = A.row(lclRow);
92 const LO numEntInRow =
static_cast<LO
>(row_view.length);
97 LO numValid = checkInputIndices ?
static_cast<LO
>(0) : numEntInRow;
103 for (LO j = 0; j < numEntInInput; ++j) {
104 const LO perm_index = sortPerm[j];
105 const LO lclColInd = lclColInds[perm_index];
108 while (row_view.colidx(offset) != lclColInd) {
115 if (checkInputIndices) {
116 if (offset != numEntInRow) {
121 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
123 row_view.value(offset) += vals[perm_index];
130 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
132 row_view.value(offset) += vals[perm_index];
178 template <
class SparseMatrixType,
181 typename SparseMatrixType::ordinal_type
183 const typename SparseMatrixType::ordinal_type lclRow,
184 const typename SparseMatrixType::ordinal_type lclColInds[],
185 const typename SparseMatrixType::ordinal_type sortPerm[],
186 const ValsViewType& vals,
187 const typename SparseMatrixType::ordinal_type numEntInInput,
188 const bool forceAtomic =
189 #ifdef KOKKOS_ENABLE_SERIAL
190 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
194 const bool checkInputIndices =
true) {
195 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
197 static_assert(std::is_same<matrix_scalar_type,
198 typename SparseMatrixType::value_type>::value,
199 "The matrix's entries must have a nonconst type.");
200 static_assert(std::is_assignable<matrix_scalar_type,
201 typename std::decay<decltype(A.values[0] + vals[0])>::type>::value,
202 "The result of adding a matrix entry and an entry of vals "
203 "MUST be assignable to a matrix entry.");
204 typedef typename SparseMatrixType::ordinal_type LO;
205 static_assert(std::is_integral<LO>::value,
206 "SparseMatrixType::ordinal_type "
207 "must be a built-in integer type.");
212 auto row_view = A.row(lclRow);
213 const LO numEntInRow =
static_cast<LO
>(row_view.length);
218 LO numValid = checkInputIndices ?
static_cast<LO
>(0) : numEntInRow;
224 for (LO j = 0; j < numEntInInput; ++j) {
225 const LO perm_index = sortPerm[j];
226 const LO lclColInd = lclColInds[perm_index];
229 while (row_view.colidx(offset) != lclColInd) {
236 if (checkInputIndices) {
237 if (offset != numEntInRow) {
242 Kokkos::atomic_store(&(row_view.value(offset)), vals[perm_index]);
244 row_view.value(offset) += vals[perm_index];
251 Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
253 row_view.value(offset) += vals[perm_index];
312 template <
class SparseMatrixType,
313 class VectorViewType,
317 typename SparseMatrixType::ordinal_type
319 const VectorViewType& x,
320 typename SparseMatrixType::ordinal_type lids[],
321 typename SparseMatrixType::ordinal_type sortPerm[],
322 const RhsViewType& rhs,
323 const LhsViewType& lhs,
324 const bool forceAtomic =
325 #ifdef KOKKOS_ENABLE_SERIAL
326 !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
330 const bool checkInputIndices =
true) {
331 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
333 typedef typename std::remove_const<typename VectorViewType::value_type>::type
335 static_assert(std::is_same<matrix_scalar_type,
336 typename SparseMatrixType::value_type>::value,
337 "The sparse output matrix A's entries must have a nonconst type.");
338 static_assert(std::is_same<vector_scalar_type,
339 typename VectorViewType::value_type>::value,
340 "The dense output vector x's entries must have a nonconst type.");
351 typedef typename SparseMatrixType::ordinal_type LO;
352 static_assert(std::is_integral<LO>::value,
353 "SparseMatrixType::ordinal_type "
354 "must be a built-in integer type.");
356 const LO eltDim = rhs.extent(0);
359 for (LO i = 0; i < eltDim; ++i) {
364 LO totalNumValid = 0;
365 for (LO r = 0; r < eltDim; ++r) {
366 const LO lid = lids[r];
368 auto lhs_r = Kokkos::subview(lhs, r, Kokkos::ALL());
373 const LO curNumValid =
378 Kokkos::atomic_add(&x(lid), rhs(sortPerm[r]));
380 x(lid) += rhs(sortPerm[r]);
382 totalNumValid += curNumValid;
384 return totalNumValid;
390 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.