10 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
13 #include "KokkosSparse_CrsMatrix.hpp"
15 #include <type_traits>
57 template<
class SparseMatrixType,
60 typename SparseMatrixType::ordinal_type
62 const typename SparseMatrixType::ordinal_type lclRow,
63 const typename SparseMatrixType::ordinal_type lclColInds[],
64 const typename SparseMatrixType::ordinal_type sortPerm[],
65 const ValsViewType& vals,
66 const typename SparseMatrixType::ordinal_type numEntInInput,
67 const bool forceAtomic =
68 #ifdef KOKKOS_ENABLE_SERIAL
69 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
73 const bool checkInputIndices =
true)
75 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
77 static_assert (std::is_same<matrix_scalar_type,
78 typename SparseMatrixType::value_type>::value,
79 "The matrix's entries must have a nonconst type.");
84 typedef typename SparseMatrixType::ordinal_type LO;
85 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
86 "must be a built-in integer type.");
91 auto row_view = A.row (lclRow);
92 const LO numEntInRow =
static_cast<LO
> (row_view.length);
97 LO numValid = checkInputIndices ?
static_cast<LO
> (0) : numEntInRow;
103 for (LO j = 0; j < numEntInInput; ++j) {
104 const LO perm_index = sortPerm[j];
105 const LO lclColInd = lclColInds[perm_index];
108 while (row_view.colidx(offset) != lclColInd) {
115 if (checkInputIndices) {
116 if (offset != numEntInRow) {
121 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
124 row_view.value(offset) += vals[perm_index];
132 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
135 row_view.value(offset) += vals[perm_index];
181 template<
class SparseMatrixType,
184 typename SparseMatrixType::ordinal_type
186 const typename SparseMatrixType::ordinal_type lclRow,
187 const typename SparseMatrixType::ordinal_type lclColInds[],
188 const typename SparseMatrixType::ordinal_type sortPerm[],
189 const ValsViewType& vals,
190 const typename SparseMatrixType::ordinal_type numEntInInput,
191 const bool forceAtomic =
192 #ifdef KOKKOS_ENABLE_SERIAL
193 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
197 const bool checkInputIndices =
true)
199 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
201 static_assert (std::is_same<matrix_scalar_type,
202 typename SparseMatrixType::value_type>::value,
203 "The matrix's entries must have a nonconst type.");
204 static_assert (std::is_assignable<matrix_scalar_type,
205 typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
206 "The result of adding a matrix entry and an entry of vals "
207 "MUST be assignable to a matrix entry.");
208 typedef typename SparseMatrixType::ordinal_type LO;
209 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
210 "must be a built-in integer type.");
215 auto row_view = A.row (lclRow);
216 const LO numEntInRow =
static_cast<LO
> (row_view.length);
221 LO numValid = checkInputIndices ?
static_cast<LO
> (0) : numEntInRow;
227 for (LO j = 0; j < numEntInInput; ++j) {
228 const LO perm_index = sortPerm[j];
229 const LO lclColInd = lclColInds[perm_index];
232 while (row_view.colidx(offset) != lclColInd) {
239 if (checkInputIndices) {
240 if (offset != numEntInRow) {
245 Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
248 row_view.value(offset) += vals[perm_index];
256 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
259 row_view.value(offset) += vals[perm_index];
318 template<
class SparseMatrixType,
319 class VectorViewType,
323 typename SparseMatrixType::ordinal_type
325 const VectorViewType& x,
326 typename SparseMatrixType::ordinal_type lids[],
327 typename SparseMatrixType::ordinal_type sortPerm[],
328 const RhsViewType& rhs,
329 const LhsViewType& lhs,
330 const bool forceAtomic =
331 #ifdef KOKKOS_ENABLE_SERIAL
332 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
336 const bool checkInputIndices =
true)
338 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
340 typedef typename std::remove_const<typename VectorViewType::value_type>::type
342 static_assert (std::is_same<matrix_scalar_type,
343 typename SparseMatrixType::value_type>::value,
344 "The sparse output matrix A's entries must have a nonconst type.");
345 static_assert (std::is_same<vector_scalar_type,
346 typename VectorViewType::value_type>::value,
347 "The dense output vector x's entries must have a nonconst type.");
358 typedef typename SparseMatrixType::ordinal_type LO;
359 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
360 "must be a built-in integer type.");
362 const LO eltDim = rhs.extent (0);
365 for (LO i = 0; i < eltDim; ++i) {
370 LO totalNumValid = 0;
371 for (LO r = 0; r < eltDim; ++r) {
372 const LO lid = lids[r];
374 auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
379 const LO curNumValid =
384 Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
387 x(lid) += rhs(sortPerm[r]);
389 totalNumValid += curNumValid;
391 return totalNumValid;
397 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.