Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_crsMatrixAssembleElement.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
12 
13 #include "KokkosSparse_CrsMatrix.hpp"
15 #include <type_traits>
16 
17 namespace Tpetra {
18 namespace Details {
19 
57 template<class SparseMatrixType,
58  class ValsViewType>
59 KOKKOS_FUNCTION
60 typename SparseMatrixType::ordinal_type
61 crsMatrixSumIntoValues_sortedSortedLinear (const SparseMatrixType& A,
62  const typename SparseMatrixType::ordinal_type lclRow,
63  const typename SparseMatrixType::ordinal_type lclColInds[],
64  const typename SparseMatrixType::ordinal_type sortPerm[],
65  const ValsViewType& vals,
66  const typename SparseMatrixType::ordinal_type numEntInInput,
67  const bool forceAtomic =
68 #ifdef KOKKOS_ENABLE_SERIAL
69  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
70 #else // NOT KOKKOS_ENABLE_SERIAL
71  false,
72 #endif // KOKKOS_ENABLE_SERIAL
73  const bool checkInputIndices = true)
74 {
75  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
76  matrix_scalar_type;
77  static_assert (std::is_same<matrix_scalar_type,
78  typename SparseMatrixType::value_type>::value,
79  "The matrix's entries must have a nonconst type.");
80  // static_assert (std::is_assignable<matrix_scalar_type,
81  // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
82  // "The result of adding a matrix entry and an entry of vals "
83  // "MUST be assignable to a matrix entry.");
84  typedef typename SparseMatrixType::ordinal_type LO;
85  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
86  "must be a built-in integer type.");
87 
88  // If lclRow is NOT a valid row index, this will return a view of
89  // zero entries. If checkInputIndices is true, thus, then none of
90  // the input indices will be valid in that case.
91  auto row_view = A.row (lclRow);
92  const LO numEntInRow = static_cast<LO> (row_view.length);
93  // Number of valid local column indices found, that is, the number
94  // of input indices that are valid column indices found in row
95  // lclRow of the matrix. If not checking, we just return the number
96  // of input indices.
97  LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
98 
99  // Since both the matrix row and the input (after permutation) are
100  // sorted, we only need to pass once over the matrix row. 'offset'
101  // tells us the current search position in the matrix row.
102  LO offset = 0;
103  for (LO j = 0; j < numEntInInput; ++j) {
104  const LO perm_index = sortPerm[j];
105  const LO lclColInd = lclColInds[perm_index];
106  // Search linearly in the matrix row for the current index.
107  // If we ever want binary search, this would be the place.
108  while (row_view.colidx(offset) != lclColInd) {
109  ++offset;
110  }
111 
112  // If we could make checkInputIndices a compile-time constant,
113  // then the compiler might not need to insert a branch here. This
114  // should help vectorization, if vectorization is possible.
115  if (checkInputIndices) {
116  if (offset != numEntInRow) {
117  // If we could make forceAtomic a compile-time constant, then
118  // the compiler might not need to insert a branch here. This
119  // should help vectorization, if vectorization is possible.
120  if (forceAtomic) {
121  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
122  }
123  else {
124  row_view.value(offset) += vals[perm_index];
125  }
126  ++numValid;
127  }
128  }
129  else { // don't check input indices; assume they are in the row
130  // See above note on forceAtomic.
131  if (forceAtomic) {
132  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
133  }
134  else {
135  row_view.value(offset) += vals[perm_index];
136  }
137  }
138  }
139 
140  return numValid;
141 }
142 
181 template<class SparseMatrixType,
182  class ValsViewType>
183 KOKKOS_FUNCTION
184 typename SparseMatrixType::ordinal_type
185 crsMatrixReplaceValues_sortedSortedLinear (const SparseMatrixType& A,
186  const typename SparseMatrixType::ordinal_type lclRow,
187  const typename SparseMatrixType::ordinal_type lclColInds[],
188  const typename SparseMatrixType::ordinal_type sortPerm[],
189  const ValsViewType& vals,
190  const typename SparseMatrixType::ordinal_type numEntInInput,
191  const bool forceAtomic =
192 #ifdef KOKKOS_ENABLE_SERIAL
193  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
194 #else // NOT KOKKOS_ENABLE_SERIAL
195  false,
196 #endif // KOKKOS_ENABLE_SERIAL
197  const bool checkInputIndices = true)
198 {
199  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
200  matrix_scalar_type;
201  static_assert (std::is_same<matrix_scalar_type,
202  typename SparseMatrixType::value_type>::value,
203  "The matrix's entries must have a nonconst type.");
204  static_assert (std::is_assignable<matrix_scalar_type,
205  typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
206  "The result of adding a matrix entry and an entry of vals "
207  "MUST be assignable to a matrix entry.");
208  typedef typename SparseMatrixType::ordinal_type LO;
209  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
210  "must be a built-in integer type.");
211 
212  // If lclRow is NOT a valid row index, this will return a view of
213  // zero entries. If checkInputIndices is true, thus, then none of
214  // the input indices will be valid in that case.
215  auto row_view = A.row (lclRow);
216  const LO numEntInRow = static_cast<LO> (row_view.length);
217  // Number of valid local column indices found, that is, the number
218  // of input indices that are valid column indices found in row
219  // lclRow of the matrix. If not checking, we just return the number
220  // of input indices.
221  LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
222 
223  // Since both the matrix row and the input (after permutation) are
224  // sorted, we only need to pass once over the matrix row. 'offset'
225  // tells us the current search position in the matrix row.
226  LO offset = 0;
227  for (LO j = 0; j < numEntInInput; ++j) {
228  const LO perm_index = sortPerm[j];
229  const LO lclColInd = lclColInds[perm_index];
230  // Search linearly in the matrix row for the current index.
231  // If we ever want binary search, this would be the place.
232  while (row_view.colidx(offset) != lclColInd) {
233  ++offset;
234  }
235 
236  // If checkInputIndices were a compile-time constant, then the
237  // compiler might not need to insert a branch here. This should
238  // help vectorization, if vectorization is possible at all.
239  if (checkInputIndices) {
240  if (offset != numEntInRow) {
241  // If forceAtomic were a compile-time constant, then the
242  // compiler might not need to insert a branch here. This
243  // could help vectorization, if vectorization is possible.
244  if (forceAtomic) {
245  Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
246  }
247  else {
248  row_view.value(offset) += vals[perm_index];
249  }
250  ++numValid;
251  }
252  }
253  else { // don't check input indices; assume they are in the row
254  // See above note on forceAtomic.
255  if (forceAtomic) {
256  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
257  }
258  else {
259  row_view.value(offset) += vals[perm_index];
260  }
261  }
262  }
263 
264  return numValid;
265 }
266 
318 template<class SparseMatrixType,
319  class VectorViewType,
320  class RhsViewType,
321  class LhsViewType>
322 KOKKOS_FUNCTION
323 typename SparseMatrixType::ordinal_type
324 crsMatrixAssembleElement_sortedLinear (const SparseMatrixType& A,
325  const VectorViewType& x,
326  typename SparseMatrixType::ordinal_type lids[],
327  typename SparseMatrixType::ordinal_type sortPerm[],
328  const RhsViewType& rhs,
329  const LhsViewType& lhs,
330  const bool forceAtomic =
331 #ifdef KOKKOS_ENABLE_SERIAL
332  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
333 #else // NOT KOKKOS_ENABLE_SERIAL
334  false,
335 #endif // KOKKOS_ENABLE_SERIAL
336  const bool checkInputIndices = true)
337 {
338  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
339  matrix_scalar_type;
340  typedef typename std::remove_const<typename VectorViewType::value_type>::type
341  vector_scalar_type;
342  static_assert (std::is_same<matrix_scalar_type,
343  typename SparseMatrixType::value_type>::value,
344  "The sparse output matrix A's entries must have a nonconst type.");
345  static_assert (std::is_same<vector_scalar_type,
346  typename VectorViewType::value_type>::value,
347  "The dense output vector x's entries must have a nonconst type.");
348  // static_assert (std::is_assignable<matrix_scalar_type,
349  // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
350  // "The result of adding a sparse matrix entry and an entry of "
351  // "lhs (the dense element matrix) "
352  // "MUST be assignable to a matrix entry.");
353  // static_assert (std::is_assignable<vector_scalar_type,
354  // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
355  // "The result of adding a vector entry and an entry of "
356  // "rhs (the dense element vector) "
357  // "MUST be assignable to a vector entry.");
358  typedef typename SparseMatrixType::ordinal_type LO;
359  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
360  "must be a built-in integer type.");
361 
362  const LO eltDim = rhs.extent (0);
363 
364  // Generate sort permutation
365  for (LO i = 0; i < eltDim; ++i) {
366  sortPerm[i] = i;
367  }
368  shellSortKeysAndValues (lids, sortPerm, eltDim);
369 
370  LO totalNumValid = 0;
371  for (LO r = 0; r < eltDim; ++r) {
372  const LO lid = lids[r];
373  //auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
374  auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
375 
376  // This assumes that lid is always a valid row in the sparse
377  // matrix, and that the local indices in each row of the matrix
378  // are always sorted.
379  const LO curNumValid =
380  crsMatrixSumIntoValues_sortedSortedLinear (A, lid, lids, sortPerm, lhs_r,
381  eltDim, forceAtomic,
382  checkInputIndices);
383  if (forceAtomic) {
384  Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
385  }
386  else {
387  x(lid) += rhs(sortPerm[r]);
388  }
389  totalNumValid += curNumValid;
390  }
391  return totalNumValid;
392 }
393 
394 } // namespace Details
395 } // namespace Tpetra
396 
397 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.