Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
11 #define TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
13 
14 namespace Tpetra {
15 
16 namespace MatrixMatrix {
17 
18 // This guy allows us to easily get an Unmanaged Kokkos View from a ManagedOne
19 template <typename View>
20 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout, typename View::device_type, typename Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
21 
22 namespace ExtraKernels {
23 
24 template <class CrsMatrixType>
25 size_t C_estimate_nnz_per_row(CrsMatrixType& A, CrsMatrixType& B);
26 
27 // 2019 Apr 10 JJE:
28 // copies data from thread local chunks into a unified CSR structure
29 // 'const' on the inCol and inVals array is a lie. The routine will deallocate
30 // the thread local storage. Maybe they shouldn't be const. Or mark, non-const
31 // and have a helper function for the actual copies that takes these as const
32 // . The point of const is that we want the loops to optimize assuming the
33 // RHS is unchanging
34 template <class InColindArrayType,
35  class InValsArrayType,
36  class OutRowptrType,
37  class OutColindType,
38  class OutValsType>
39 void copy_out_from_thread_memory(const OutColindType& thread_total_nnz,
40  const InColindArrayType& Incolind,
41  const InValsArrayType& Invals,
42  const size_t m,
43  const double thread_chunk,
44  OutRowptrType& Outrowptr,
45  OutColindType& Outcolind,
46  OutValsType& Outvals);
47 
48 /***************************** Matrix-Matrix OpenMP Only Kernels *****************************/
49 #ifdef HAVE_TPETRA_INST_OPENMP
50 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
51 static inline void mult_A_B_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
52  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
53  const LocalOrdinalViewType& Acol2Brow,
54  const LocalOrdinalViewType& Acol2Irow,
55  const LocalOrdinalViewType& Bcol2Ccol,
56  const LocalOrdinalViewType& Icol2Ccol,
57  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
58  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
59  const std::string& label,
60  const Teuchos::RCP<Teuchos::ParameterList>& params);
61 
62 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
63 static inline void mult_A_B_reuse_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
64  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
65  const LocalOrdinalViewType& Acol2Brow,
66  const LocalOrdinalViewType& Acol2Irow,
67  const LocalOrdinalViewType& Bcol2Ccol,
68  const LocalOrdinalViewType& Icol2Ccol,
69  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
70  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
71  const std::string& label,
72  const Teuchos::RCP<Teuchos::ParameterList>& params);
73 
74 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
75 static inline void jacobi_A_B_newmatrix_LowThreadGustavsonKernel(Scalar omega,
76  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
77  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
78  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
79  const LocalOrdinalViewType& Acol2Brow,
80  const LocalOrdinalViewType& Acol2Irow,
81  const LocalOrdinalViewType& Bcol2Ccol,
82  const LocalOrdinalViewType& Icol2Ccol,
83  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
84  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
85  const std::string& label,
86  const Teuchos::RCP<Teuchos::ParameterList>& params);
87 
88 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
89 static inline void jacobi_A_B_reuse_LowThreadGustavsonKernel(Scalar omega,
90  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
91  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
92  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
93  const LocalOrdinalViewType& Acol2Brow,
94  const LocalOrdinalViewType& Acol2Irow,
95  const LocalOrdinalViewType& Bcol2Ccol,
96  const LocalOrdinalViewType& Icol2Ccol,
97  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
98  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
99  const std::string& label,
100  const Teuchos::RCP<Teuchos::ParameterList>& params);
101 #endif
102 
103 /***************************** Matrix-Matrix Generic Kernels *****************************/
104 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalOrdinalViewType>
105 static inline void jacobi_A_B_newmatrix_MultiplyScaleAddKernel(Scalar omega,
106  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
107  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
108  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
109  const LocalOrdinalViewType& Acol2rrow,
110  const LocalOrdinalViewType& Acol2Irow,
111  const LocalOrdinalViewType& Bcol2Ccol,
112  const LocalOrdinalViewType& Icol2Ccol,
113  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
114  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > Cimport,
115  const std::string& label,
116  const Teuchos::RCP<Teuchos::ParameterList>& params);
117 
118 /***************************** Triple Product OpenMP Only Kernels *****************************/
119 #ifdef HAVE_TPETRA_INST_OPENMP
120 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
121 static inline void mult_R_A_P_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
122  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
123  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
124  const LocalOrdinalViewType& Acol2Prow,
125  const LocalOrdinalViewType& Acol2PIrow,
126  const LocalOrdinalViewType& Pcol2Accol,
127  const LocalOrdinalViewType& PIcol2Accol,
128  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
129  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
130  const std::string& label = std::string(),
131  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
132 #endif
133 
134 } // namespace ExtraKernels
135 } // namespace MatrixMatrix
136 } // namespace Tpetra
137 
138 #endif