Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_crsMatrixAssembleElement.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
43 #define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
44 
45 #include "KokkosSparse_CrsMatrix.hpp"
47 #include <type_traits>
48 
49 namespace Tpetra {
50 namespace Details {
51 
89 template<class SparseMatrixType,
90  class ValsViewType>
91 KOKKOS_FUNCTION
92 typename SparseMatrixType::ordinal_type
93 crsMatrixSumIntoValues_sortedSortedLinear (const SparseMatrixType& A,
94  const typename SparseMatrixType::ordinal_type lclRow,
95  const typename SparseMatrixType::ordinal_type lclColInds[],
96  const typename SparseMatrixType::ordinal_type sortPerm[],
97  const ValsViewType& vals,
98  const typename SparseMatrixType::ordinal_type numEntInInput,
99  const bool forceAtomic =
100 #ifdef KOKKOS_ENABLE_SERIAL
101  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
102 #else // NOT KOKKOS_ENABLE_SERIAL
103  false,
104 #endif // KOKKOS_ENABLE_SERIAL
105  const bool checkInputIndices = true)
106 {
107  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
108  matrix_scalar_type;
109  static_assert (std::is_same<matrix_scalar_type,
110  typename SparseMatrixType::value_type>::value,
111  "The matrix's entries must have a nonconst type.");
112  // static_assert (std::is_assignable<matrix_scalar_type,
113  // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
114  // "The result of adding a matrix entry and an entry of vals "
115  // "MUST be assignable to a matrix entry.");
116  typedef typename SparseMatrixType::ordinal_type LO;
117  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
118  "must be a built-in integer type.");
119 
120  // If lclRow is NOT a valid row index, this will return a view of
121  // zero entries. If checkInputIndices is true, thus, then none of
122  // the input indices will be valid in that case.
123  auto row_view = A.row (lclRow);
124  const LO numEntInRow = static_cast<LO> (row_view.length);
125  // Number of valid local column indices found, that is, the number
126  // of input indices that are valid column indices found in row
127  // lclRow of the matrix. If not checking, we just return the number
128  // of input indices.
129  LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
130 
131  // Since both the matrix row and the input (after permutation) are
132  // sorted, we only need to pass once over the matrix row. 'offset'
133  // tells us the current search position in the matrix row.
134  LO offset = 0;
135  for (LO j = 0; j < numEntInInput; ++j) {
136  const LO perm_index = sortPerm[j];
137  const LO lclColInd = lclColInds[perm_index];
138  // Search linearly in the matrix row for the current index.
139  // If we ever want binary search, this would be the place.
140  while (row_view.colidx(offset) != lclColInd) {
141  ++offset;
142  }
143 
144  // If we could make checkInputIndices a compile-time constant,
145  // then the compiler might not need to insert a branch here. This
146  // should help vectorization, if vectorization is possible.
147  if (checkInputIndices) {
148  if (offset != numEntInRow) {
149  // If we could make forceAtomic a compile-time constant, then
150  // the compiler might not need to insert a branch here. This
151  // should help vectorization, if vectorization is possible.
152  if (forceAtomic) {
153  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
154  }
155  else {
156  row_view.value(offset) += vals[perm_index];
157  }
158  ++numValid;
159  }
160  }
161  else { // don't check input indices; assume they are in the row
162  // See above note on forceAtomic.
163  if (forceAtomic) {
164  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
165  }
166  else {
167  row_view.value(offset) += vals[perm_index];
168  }
169  }
170  }
171 
172  return numValid;
173 }
174 
213 template<class SparseMatrixType,
214  class ValsViewType>
215 KOKKOS_FUNCTION
216 typename SparseMatrixType::ordinal_type
217 crsMatrixReplaceValues_sortedSortedLinear (const SparseMatrixType& A,
218  const typename SparseMatrixType::ordinal_type lclRow,
219  const typename SparseMatrixType::ordinal_type lclColInds[],
220  const typename SparseMatrixType::ordinal_type sortPerm[],
221  const ValsViewType& vals,
222  const typename SparseMatrixType::ordinal_type numEntInInput,
223  const bool forceAtomic =
224 #ifdef KOKKOS_ENABLE_SERIAL
225  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
226 #else // NOT KOKKOS_ENABLE_SERIAL
227  false,
228 #endif // KOKKOS_ENABLE_SERIAL
229  const bool checkInputIndices = true)
230 {
231  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
232  matrix_scalar_type;
233  static_assert (std::is_same<matrix_scalar_type,
234  typename SparseMatrixType::value_type>::value,
235  "The matrix's entries must have a nonconst type.");
236  static_assert (std::is_assignable<matrix_scalar_type,
237  typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
238  "The result of adding a matrix entry and an entry of vals "
239  "MUST be assignable to a matrix entry.");
240  typedef typename SparseMatrixType::ordinal_type LO;
241  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
242  "must be a built-in integer type.");
243 
244  // If lclRow is NOT a valid row index, this will return a view of
245  // zero entries. If checkInputIndices is true, thus, then none of
246  // the input indices will be valid in that case.
247  auto row_view = A.row (lclRow);
248  const LO numEntInRow = static_cast<LO> (row_view.length);
249  // Number of valid local column indices found, that is, the number
250  // of input indices that are valid column indices found in row
251  // lclRow of the matrix. If not checking, we just return the number
252  // of input indices.
253  LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
254 
255  // Since both the matrix row and the input (after permutation) are
256  // sorted, we only need to pass once over the matrix row. 'offset'
257  // tells us the current search position in the matrix row.
258  LO offset = 0;
259  for (LO j = 0; j < numEntInInput; ++j) {
260  const LO perm_index = sortPerm[j];
261  const LO lclColInd = lclColInds[perm_index];
262  // Search linearly in the matrix row for the current index.
263  // If we ever want binary search, this would be the place.
264  while (row_view.colidx(offset) != lclColInd) {
265  ++offset;
266  }
267 
268  // If checkInputIndices were a compile-time constant, then the
269  // compiler might not need to insert a branch here. This should
270  // help vectorization, if vectorization is possible at all.
271  if (checkInputIndices) {
272  if (offset != numEntInRow) {
273  // If forceAtomic were a compile-time constant, then the
274  // compiler might not need to insert a branch here. This
275  // could help vectorization, if vectorization is possible.
276  if (forceAtomic) {
277  Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
278  }
279  else {
280  row_view.value(offset) += vals[perm_index];
281  }
282  ++numValid;
283  }
284  }
285  else { // don't check input indices; assume they are in the row
286  // See above note on forceAtomic.
287  if (forceAtomic) {
288  Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
289  }
290  else {
291  row_view.value(offset) += vals[perm_index];
292  }
293  }
294  }
295 
296  return numValid;
297 }
298 
350 template<class SparseMatrixType,
351  class VectorViewType,
352  class RhsViewType,
353  class LhsViewType>
354 KOKKOS_FUNCTION
355 typename SparseMatrixType::ordinal_type
356 crsMatrixAssembleElement_sortedLinear (const SparseMatrixType& A,
357  const VectorViewType& x,
358  typename SparseMatrixType::ordinal_type lids[],
359  typename SparseMatrixType::ordinal_type sortPerm[],
360  const RhsViewType& rhs,
361  const LhsViewType& lhs,
362  const bool forceAtomic =
363 #ifdef KOKKOS_ENABLE_SERIAL
364  ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
365 #else // NOT KOKKOS_ENABLE_SERIAL
366  false,
367 #endif // KOKKOS_ENABLE_SERIAL
368  const bool checkInputIndices = true)
369 {
370  typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
371  matrix_scalar_type;
372  typedef typename std::remove_const<typename VectorViewType::value_type>::type
373  vector_scalar_type;
374  static_assert (std::is_same<matrix_scalar_type,
375  typename SparseMatrixType::value_type>::value,
376  "The sparse output matrix A's entries must have a nonconst type.");
377  static_assert (std::is_same<vector_scalar_type,
378  typename VectorViewType::value_type>::value,
379  "The dense output vector x's entries must have a nonconst type.");
380  // static_assert (std::is_assignable<matrix_scalar_type,
381  // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
382  // "The result of adding a sparse matrix entry and an entry of "
383  // "lhs (the dense element matrix) "
384  // "MUST be assignable to a matrix entry.");
385  // static_assert (std::is_assignable<vector_scalar_type,
386  // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
387  // "The result of adding a vector entry and an entry of "
388  // "rhs (the dense element vector) "
389  // "MUST be assignable to a vector entry.");
390  typedef typename SparseMatrixType::ordinal_type LO;
391  static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
392  "must be a built-in integer type.");
393 
394  const LO eltDim = rhs.extent (0);
395 
396  // Generate sort permutation
397  for (LO i = 0; i < eltDim; ++i) {
398  sortPerm[i] = i;
399  }
400  shellSortKeysAndValues (lids, sortPerm, eltDim);
401 
402  LO totalNumValid = 0;
403  for (LO r = 0; r < eltDim; ++r) {
404  const LO lid = lids[r];
405  //auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
406  auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
407 
408  // This assumes that lid is always a valid row in the sparse
409  // matrix, and that the local indices in each row of the matrix
410  // are always sorted.
411  const LO curNumValid =
412  crsMatrixSumIntoValues_sortedSortedLinear (A, lid, lids, sortPerm, lhs_r,
413  eltDim, forceAtomic,
414  checkInputIndices);
415  if (forceAtomic) {
416  Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
417  }
418  else {
419  x(lid) += rhs(sortPerm[r]);
420  }
421  totalNumValid += curNumValid;
422  }
423  return totalNumValid;
424 }
425 
426 } // namespace Details
427 } // namespace Tpetra
428 
429 #endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
Declaration and definition of functions for sorting &quot;short&quot; arrays of keys and corresponding values...
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.