42 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
43 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
45 #include "KokkosSparse_CrsMatrix.hpp"
47 #include <type_traits>
89 template<
class SparseMatrixType,
92 typename SparseMatrixType::ordinal_type
94 const typename SparseMatrixType::ordinal_type lclRow,
95 const typename SparseMatrixType::ordinal_type lclColInds[],
96 const typename SparseMatrixType::ordinal_type sortPerm[],
97 const ValsViewType& vals,
98 const typename SparseMatrixType::ordinal_type numEntInInput,
99 const bool forceAtomic =
100 #ifdef KOKKOS_ENABLE_SERIAL
101 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
105 const bool checkInputIndices =
true)
107 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
109 static_assert (std::is_same<matrix_scalar_type,
110 typename SparseMatrixType::value_type>::value,
111 "The matrix's entries must have a nonconst type.");
116 typedef typename SparseMatrixType::ordinal_type LO;
117 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
118 "must be a built-in integer type.");
123 auto row_view = A.row (lclRow);
124 const LO numEntInRow =
static_cast<LO
> (row_view.length);
129 LO numValid = checkInputIndices ?
static_cast<LO
> (0) : numEntInRow;
135 for (LO j = 0; j < numEntInInput; ++j) {
136 const LO perm_index = sortPerm[j];
137 const LO lclColInd = lclColInds[perm_index];
140 while (row_view.colidx(offset) != lclColInd) {
147 if (checkInputIndices) {
148 if (offset != numEntInRow) {
153 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
156 row_view.value(offset) += vals[perm_index];
164 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
167 row_view.value(offset) += vals[perm_index];
213 template<
class SparseMatrixType,
216 typename SparseMatrixType::ordinal_type
218 const typename SparseMatrixType::ordinal_type lclRow,
219 const typename SparseMatrixType::ordinal_type lclColInds[],
220 const typename SparseMatrixType::ordinal_type sortPerm[],
221 const ValsViewType& vals,
222 const typename SparseMatrixType::ordinal_type numEntInInput,
223 const bool forceAtomic =
224 #ifdef KOKKOS_ENABLE_SERIAL
225 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
229 const bool checkInputIndices =
true)
231 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
233 static_assert (std::is_same<matrix_scalar_type,
234 typename SparseMatrixType::value_type>::value,
235 "The matrix's entries must have a nonconst type.");
236 static_assert (std::is_assignable<matrix_scalar_type,
237 typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
238 "The result of adding a matrix entry and an entry of vals "
239 "MUST be assignable to a matrix entry.");
240 typedef typename SparseMatrixType::ordinal_type LO;
241 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
242 "must be a built-in integer type.");
247 auto row_view = A.row (lclRow);
248 const LO numEntInRow =
static_cast<LO
> (row_view.length);
253 LO numValid = checkInputIndices ?
static_cast<LO
> (0) : numEntInRow;
259 for (LO j = 0; j < numEntInInput; ++j) {
260 const LO perm_index = sortPerm[j];
261 const LO lclColInd = lclColInds[perm_index];
264 while (row_view.colidx(offset) != lclColInd) {
271 if (checkInputIndices) {
272 if (offset != numEntInRow) {
277 Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
280 row_view.value(offset) += vals[perm_index];
288 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
291 row_view.value(offset) += vals[perm_index];
350 template<
class SparseMatrixType,
351 class VectorViewType,
355 typename SparseMatrixType::ordinal_type
357 const VectorViewType& x,
358 typename SparseMatrixType::ordinal_type lids[],
359 typename SparseMatrixType::ordinal_type sortPerm[],
360 const RhsViewType& rhs,
361 const LhsViewType& lhs,
362 const bool forceAtomic =
363 #ifdef KOKKOS_ENABLE_SERIAL
364 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
368 const bool checkInputIndices =
true)
370 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
372 typedef typename std::remove_const<typename VectorViewType::value_type>::type
374 static_assert (std::is_same<matrix_scalar_type,
375 typename SparseMatrixType::value_type>::value,
376 "The sparse output matrix A's entries must have a nonconst type.");
377 static_assert (std::is_same<vector_scalar_type,
378 typename VectorViewType::value_type>::value,
379 "The dense output vector x's entries must have a nonconst type.");
390 typedef typename SparseMatrixType::ordinal_type LO;
391 static_assert (std::is_integral<LO>::value,
"SparseMatrixType::ordinal_type "
392 "must be a built-in integer type.");
394 const LO eltDim = rhs.extent (0);
397 for (LO i = 0; i < eltDim; ++i) {
402 LO totalNumValid = 0;
403 for (LO r = 0; r < eltDim; ++r) {
404 const LO lid = lids[r];
406 auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
411 const LO curNumValid =
416 Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
419 x(lid) += rhs(sortPerm[r]);
421 totalNumValid += curNumValid;
423 return totalNumValid;
429 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.