Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_crsMatrixAssembleElement.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
12 
13 #include "KokkosSparse_CrsMatrix.hpp"
15 #include <type_traits>
16 
17 namespace Tpetra {
18 namespace Details {
19 
57 template <class SparseMatrixType,
58  class ValsViewType>
59 KOKKOS_FUNCTION
60  typename SparseMatrixType::ordinal_type
61  crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType& A,
62  const typename SparseMatrixType::ordinal_type lclRow,
63  const typename SparseMatrixType::ordinal_type lclColInds[],
64  const typename SparseMatrixType::ordinal_type sortPerm[],
65  const ValsViewType& vals,
66  const typename SparseMatrixType::ordinal_type numEntInInput,
67  const bool forceAtomic =
68 #ifdef KOKKOS_ENABLE_SERIAL
69  !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
70 #else // NOT KOKKOS_ENABLE_SERIAL
71  false,
72 #endif // KOKKOS_ENABLE_SERIAL
73  const bool checkInputIndices = true) {
74  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
75  matrix_scalar_type;
76  static_assert(std::is_same<matrix_scalar_type,
77  typename SparseMatrixType::value_type>::value,
78  "The matrix's entries must have a nonconst type.");
79  // static_assert (std::is_assignable<matrix_scalar_type,
80  // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
81  // "The result of adding a matrix entry and an entry of vals "
82  // "MUST be assignable to a matrix entry.");
83  typedef typename SparseMatrixType::ordinal_type LO;
84  static_assert(std::is_integral<LO>::value,
85  "SparseMatrixType::ordinal_type "
86  "must be a built-in integer type.");
87 
88  // If lclRow is NOT a valid row index, this will return a view of
89  // zero entries. If checkInputIndices is true, thus, then none of
90  // the input indices will be valid in that case.
91  auto row_view = A.row(lclRow);
92  const LO numEntInRow = static_cast<LO>(row_view.length);
93  // Number of valid local column indices found, that is, the number
94  // of input indices that are valid column indices found in row
95  // lclRow of the matrix. If not checking, we just return the number
96  // of input indices.
97  LO numValid = checkInputIndices ? static_cast<LO>(0) : numEntInRow;
98 
99  // Since both the matrix row and the input (after permutation) are
100  // sorted, we only need to pass once over the matrix row. 'offset'
101  // tells us the current search position in the matrix row.
102  LO offset = 0;
103  for (LO j = 0; j < numEntInInput; ++j) {
104  const LO perm_index = sortPerm[j];
105  const LO lclColInd = lclColInds[perm_index];
106  // Search linearly in the matrix row for the current index.
107  // If we ever want binary search, this would be the place.
108  while (row_view.colidx(offset) != lclColInd) {
109  ++offset;
110  }
111 
112  // If we could make checkInputIndices a compile-time constant,
113  // then the compiler might not need to insert a branch here. This
114  // should help vectorization, if vectorization is possible.
115  if (checkInputIndices) {
116  if (offset != numEntInRow) {
117  // If we could make forceAtomic a compile-time constant, then
118  // the compiler might not need to insert a branch here. This
119  // should help vectorization, if vectorization is possible.
120  if (forceAtomic) {
121  Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
122  } else {
123  row_view.value(offset) += vals[perm_index];
124  }
125  ++numValid;
126  }
127  } else { // don't check input indices; assume they are in the row
128  // See above note on forceAtomic.
129  if (forceAtomic) {
130  Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
131  } else {
132  row_view.value(offset) += vals[perm_index];
133  }
134  }
135  }
136 
137  return numValid;
138 }
139 
178 template <class SparseMatrixType,
179  class ValsViewType>
180 KOKKOS_FUNCTION
181  typename SparseMatrixType::ordinal_type
182  crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType& A,
183  const typename SparseMatrixType::ordinal_type lclRow,
184  const typename SparseMatrixType::ordinal_type lclColInds[],
185  const typename SparseMatrixType::ordinal_type sortPerm[],
186  const ValsViewType& vals,
187  const typename SparseMatrixType::ordinal_type numEntInInput,
188  const bool forceAtomic =
189 #ifdef KOKKOS_ENABLE_SERIAL
190  !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
191 #else // NOT KOKKOS_ENABLE_SERIAL
192  false,
193 #endif // KOKKOS_ENABLE_SERIAL
194  const bool checkInputIndices = true) {
195  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
196  matrix_scalar_type;
197  static_assert(std::is_same<matrix_scalar_type,
198  typename SparseMatrixType::value_type>::value,
199  "The matrix's entries must have a nonconst type.");
200  static_assert(std::is_assignable<matrix_scalar_type,
201  typename std::decay<decltype(A.values[0] + vals[0])>::type>::value,
202  "The result of adding a matrix entry and an entry of vals "
203  "MUST be assignable to a matrix entry.");
204  typedef typename SparseMatrixType::ordinal_type LO;
205  static_assert(std::is_integral<LO>::value,
206  "SparseMatrixType::ordinal_type "
207  "must be a built-in integer type.");
208 
209  // If lclRow is NOT a valid row index, this will return a view of
210  // zero entries. If checkInputIndices is true, thus, then none of
211  // the input indices will be valid in that case.
212  auto row_view = A.row(lclRow);
213  const LO numEntInRow = static_cast<LO>(row_view.length);
214  // Number of valid local column indices found, that is, the number
215  // of input indices that are valid column indices found in row
216  // lclRow of the matrix. If not checking, we just return the number
217  // of input indices.
218  LO numValid = checkInputIndices ? static_cast<LO>(0) : numEntInRow;
219 
220  // Since both the matrix row and the input (after permutation) are
221  // sorted, we only need to pass once over the matrix row. 'offset'
222  // tells us the current search position in the matrix row.
223  LO offset = 0;
224  for (LO j = 0; j < numEntInInput; ++j) {
225  const LO perm_index = sortPerm[j];
226  const LO lclColInd = lclColInds[perm_index];
227  // Search linearly in the matrix row for the current index.
228  // If we ever want binary search, this would be the place.
229  while (row_view.colidx(offset) != lclColInd) {
230  ++offset;
231  }
232 
233  // If checkInputIndices were a compile-time constant, then the
234  // compiler might not need to insert a branch here. This should
235  // help vectorization, if vectorization is possible at all.
236  if (checkInputIndices) {
237  if (offset != numEntInRow) {
238  // If forceAtomic were a compile-time constant, then the
239  // compiler might not need to insert a branch here. This
240  // could help vectorization, if vectorization is possible.
241  if (forceAtomic) {
242  Kokkos::atomic_store(&(row_view.value(offset)), vals[perm_index]);
243  } else {
244  row_view.value(offset) += vals[perm_index];
245  }
246  ++numValid;
247  }
248  } else { // don't check input indices; assume they are in the row
249  // See above note on forceAtomic.
250  if (forceAtomic) {
251  Kokkos::atomic_add(&(row_view.value(offset)), vals[perm_index]);
252  } else {
253  row_view.value(offset) += vals[perm_index];
254  }
255  }
256  }
257 
258  return numValid;
259 }
260 
312 template <class SparseMatrixType,
313  class VectorViewType,
314  class RhsViewType,
315  class LhsViewType>
316 KOKKOS_FUNCTION
317  typename SparseMatrixType::ordinal_type
318  crsMatrixAssembleElement_sortedLinear(const SparseMatrixType& A,
319  const VectorViewType& x,
320  typename SparseMatrixType::ordinal_type lids[],
321  typename SparseMatrixType::ordinal_type sortPerm[],
322  const RhsViewType& rhs,
323  const LhsViewType& lhs,
324  const bool forceAtomic =
325 #ifdef KOKKOS_ENABLE_SERIAL
326  !std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
327 #else // NOT KOKKOS_ENABLE_SERIAL
328  false,
329 #endif // KOKKOS_ENABLE_SERIAL
330  const bool checkInputIndices = true) {
331  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
332  matrix_scalar_type;
333  typedef typename std::remove_const<typename VectorViewType::value_type>::type
334  vector_scalar_type;
335  static_assert(std::is_same<matrix_scalar_type,
336  typename SparseMatrixType::value_type>::value,
337  "The sparse output matrix A's entries must have a nonconst type.");
338  static_assert(std::is_same<vector_scalar_type,
339  typename VectorViewType::value_type>::value,
340  "The dense output vector x's entries must have a nonconst type.");
341  // static_assert (std::is_assignable<matrix_scalar_type,
342  // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
343  // "The result of adding a sparse matrix entry and an entry of "
344  // "lhs (the dense element matrix) "
345  // "MUST be assignable to a matrix entry.");
346  // static_assert (std::is_assignable<vector_scalar_type,
347  // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
348  // "The result of adding a vector entry and an entry of "
349  // "rhs (the dense element vector) "
350  // "MUST be assignable to a vector entry.");
351  typedef typename SparseMatrixType::ordinal_type LO;
352  static_assert(std::is_integral<LO>::value,
353  "SparseMatrixType::ordinal_type "
354  "must be a built-in integer type.");
355 
356  const LO eltDim = rhs.extent(0);
357 
358  // Generate sort permutation
359  for (LO i = 0; i < eltDim; ++i) {
360  sortPerm[i] = i;
361  }
362  shellSortKeysAndValues(lids, sortPerm, eltDim);
363 
364  LO totalNumValid = 0;
365  for (LO r = 0; r < eltDim; ++r) {
366  const LO lid = lids[r];
367  // auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
368  auto lhs_r = Kokkos::subview(lhs, r, Kokkos::ALL());
369 
370  // This assumes that lid is always a valid row in the sparse
371  // matrix, and that the local indices in each row of the matrix
372  // are always sorted.
373  const LO curNumValid =
374  crsMatrixSumIntoValues_sortedSortedLinear(A, lid, lids, sortPerm, lhs_r,
375  eltDim, forceAtomic,
376  checkInputIndices);
377  if (forceAtomic) {
378  Kokkos::atomic_add(&x(lid), rhs(sortPerm[r]));
379  } else {
380  x(lid) += rhs(sortPerm[r]);
381  }
382  totalNumValid += curNumValid;
383  }
384  return totalNumValid;
385 }
386 
387 } // namespace Details
388 } // namespace Tpetra
389 
390 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.