Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
11 #define TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
13 
14 
15 namespace Tpetra {
16 
17 namespace MatrixMatrix {
18 
19  // This guy allows us to easily get an Unmanaged Kokkos View from a ManagedOne
20  template <typename View>
21  using UnmanagedView = Kokkos::View< typename View::data_type
22  , typename View::array_layout
23  , typename View::device_type
24  , typename Kokkos::MemoryTraits< Kokkos::Unmanaged>
25  >;
26 
27  namespace ExtraKernels {
28 
29  template<class CrsMatrixType>
30  size_t C_estimate_nnz_per_row(CrsMatrixType & A, CrsMatrixType &B);
31 
32  // 2019 Apr 10 JJE:
33  // copies data from thread local chunks into a unified CSR structure
34  // 'const' on the inCol and inVals array is a lie. The routine will deallocate
35  // the thread local storage. Maybe they shouldn't be const. Or mark, non-const
36  // and have a helper function for the actual copies that takes these as const
37  // . The point of const is that we want the loops to optimize assuming the
38  // RHS is unchanging
39  template<class InColindArrayType,
40  class InValsArrayType,
41  class OutRowptrType,
42  class OutColindType,
43  class OutValsType>
44  void copy_out_from_thread_memory(const OutColindType& thread_total_nnz,
45  const InColindArrayType& Incolind,
46  const InValsArrayType& Invals,
47  const size_t m,
48  const double thread_chunk,
49  OutRowptrType& Outrowptr,
50  OutColindType& Outcolind,
51  OutValsType& Outvals);
52 
53  /***************************** Matrix-Matrix OpenMP Only Kernels *****************************/
54 #ifdef HAVE_TPETRA_INST_OPENMP
55  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
56  static inline void mult_A_B_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
57  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
58  const LocalOrdinalViewType & Acol2Brow,
59  const LocalOrdinalViewType & Acol2Irow,
60  const LocalOrdinalViewType & Bcol2Ccol,
61  const LocalOrdinalViewType & Icol2Ccol,
62  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
63  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
64  const std::string& label,
65  const Teuchos::RCP<Teuchos::ParameterList>& params);
66 
67  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
68  static inline void mult_A_B_reuse_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
69  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
70  const LocalOrdinalViewType & Acol2Brow,
71  const LocalOrdinalViewType & Acol2Irow,
72  const LocalOrdinalViewType & Bcol2Ccol,
73  const LocalOrdinalViewType & Icol2Ccol,
74  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
75  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
76  const std::string& label,
77  const Teuchos::RCP<Teuchos::ParameterList>& params);
78 
79  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
80  static inline void jacobi_A_B_newmatrix_LowThreadGustavsonKernel(Scalar omega,
81  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
82  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
83  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
84  const LocalOrdinalViewType & Acol2Brow,
85  const LocalOrdinalViewType & Acol2Irow,
86  const LocalOrdinalViewType & Bcol2Ccol,
87  const LocalOrdinalViewType & Icol2Ccol,
88  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
89  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
90  const std::string& label,
91  const Teuchos::RCP<Teuchos::ParameterList>& params);
92 
93  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
94  static inline void jacobi_A_B_reuse_LowThreadGustavsonKernel(Scalar omega,
95  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
96  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
97  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
98  const LocalOrdinalViewType & Acol2Brow,
99  const LocalOrdinalViewType & Acol2Irow,
100  const LocalOrdinalViewType & Bcol2Ccol,
101  const LocalOrdinalViewType & Icol2Ccol,
102  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
103  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
104  const std::string& label,
105  const Teuchos::RCP<Teuchos::ParameterList>& params);
106 #endif
107 
108  /***************************** Matrix-Matrix Generic Kernels *****************************/
109  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalOrdinalViewType>
110  static inline void jacobi_A_B_newmatrix_MultiplyScaleAddKernel(Scalar omega,
111  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Node> & Dinv,
112  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
113  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
114  const LocalOrdinalViewType & Acol2rrow,
115  const LocalOrdinalViewType & Acol2Irow,
116  const LocalOrdinalViewType & Bcol2Ccol,
117  const LocalOrdinalViewType & Icol2Ccol,
118  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
119  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
120  const std::string& label,
121  const Teuchos::RCP<Teuchos::ParameterList>& params);
122 
123 
124  /***************************** Triple Product OpenMP Only Kernels *****************************/
125 #ifdef HAVE_TPETRA_INST_OPENMP
126  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
127  static inline void mult_R_A_P_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
128  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
129  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
130  const LocalOrdinalViewType & Acol2Prow,
131  const LocalOrdinalViewType & Acol2PIrow,
132  const LocalOrdinalViewType & Pcol2Accol,
133  const LocalOrdinalViewType & PIcol2Accol,
134  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
135  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
136  const std::string& label = std::string(),
137  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
138 #endif
139 
140 
141  }// ExtraKernels
142 }//MatrixMatrix
143 }//Tpetra
144 
145 
146 
147 #endif