Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
43 #define TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
45 
46 
47 namespace Tpetra {
48 
49 namespace MatrixMatrix {
50 
51  // This guy allows us to easily get an Unmanaged Kokkos View from a ManagedOne
52  template <typename View>
53  using UnmanagedView = Kokkos::View< typename View::data_type
54  , typename View::array_layout
55  , typename View::device_type
56  , typename Kokkos::MemoryTraits< Kokkos::Unmanaged>
57  >;
58 
59  namespace ExtraKernels {
60 
61  template<class CrsMatrixType>
62  size_t C_estimate_nnz_per_row(CrsMatrixType & A, CrsMatrixType &B);
63 
64  // 2019 Apr 10 JJE:
65  // copies data from thread local chunks into a unified CSR structure
66  // 'const' on the inCol and inVals array is a lie. The routine will deallocate
67  // the thread local storage. Maybe they shouldn't be const. Or mark, non-const
68  // and have a helper function for the actual copies that takes these as const
69  // . The point of const is that we want the loops to optimize assuming the
70  // RHS is unchanging
71  template<class InColindArrayType,
72  class InValsArrayType,
73  class OutRowptrType,
74  class OutColindType,
75  class OutValsType>
76  void copy_out_from_thread_memory(const OutColindType& thread_total_nnz,
77  const InColindArrayType& Incolind,
78  const InValsArrayType& Invals,
79  const size_t m,
80  const double thread_chunk,
81  OutRowptrType& Outrowptr,
82  OutColindType& Outcolind,
83  OutValsType& Outvals);
84 
85  /***************************** Matrix-Matrix OpenMP Only Kernels *****************************/
86 #ifdef HAVE_TPETRA_INST_OPENMP
87  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
88  static inline void mult_A_B_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90  const LocalOrdinalViewType & Acol2Brow,
91  const LocalOrdinalViewType & Acol2Irow,
92  const LocalOrdinalViewType & Bcol2Ccol,
93  const LocalOrdinalViewType & Icol2Ccol,
94  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96  const std::string& label,
97  const Teuchos::RCP<Teuchos::ParameterList>& params);
98 
99  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
100  static inline void mult_A_B_reuse_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
101  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
102  const LocalOrdinalViewType & Acol2Brow,
103  const LocalOrdinalViewType & Acol2Irow,
104  const LocalOrdinalViewType & Bcol2Ccol,
105  const LocalOrdinalViewType & Icol2Ccol,
106  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
107  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
108  const std::string& label,
109  const Teuchos::RCP<Teuchos::ParameterList>& params);
110 
111  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
112  static inline void jacobi_A_B_newmatrix_LowThreadGustavsonKernel(Scalar omega,
113  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116  const LocalOrdinalViewType & Acol2Brow,
117  const LocalOrdinalViewType & Acol2Irow,
118  const LocalOrdinalViewType & Bcol2Ccol,
119  const LocalOrdinalViewType & Icol2Ccol,
120  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122  const std::string& label,
123  const Teuchos::RCP<Teuchos::ParameterList>& params);
124 
125  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
126  static inline void jacobi_A_B_reuse_LowThreadGustavsonKernel(Scalar omega,
127  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
128  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
129  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
130  const LocalOrdinalViewType & Acol2Brow,
131  const LocalOrdinalViewType & Acol2Irow,
132  const LocalOrdinalViewType & Bcol2Ccol,
133  const LocalOrdinalViewType & Icol2Ccol,
134  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
135  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
136  const std::string& label,
137  const Teuchos::RCP<Teuchos::ParameterList>& params);
138 #endif
139 
140  /***************************** Matrix-Matrix Generic Kernels *****************************/
141  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalOrdinalViewType>
142  static inline void jacobi_A_B_newmatrix_MultiplyScaleAddKernel(Scalar omega,
143  const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Node> & Dinv,
144  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
145  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
146  const LocalOrdinalViewType & Acol2rrow,
147  const LocalOrdinalViewType & Acol2Irow,
148  const LocalOrdinalViewType & Bcol2Ccol,
149  const LocalOrdinalViewType & Icol2Ccol,
150  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
151  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
152  const std::string& label,
153  const Teuchos::RCP<Teuchos::ParameterList>& params);
154 
155 
156  /***************************** Triple Product OpenMP Only Kernels *****************************/
157 #ifdef HAVE_TPETRA_INST_OPENMP
158  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
159  static inline void mult_R_A_P_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
160  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
161  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
162  const LocalOrdinalViewType & Acol2Prow,
163  const LocalOrdinalViewType & Acol2PIrow,
164  const LocalOrdinalViewType & Pcol2Accol,
165  const LocalOrdinalViewType & PIcol2Accol,
166  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
167  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
168  const std::string& label = std::string(),
169  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
170 #endif
171 
172 
173  }// ExtraKernels
174 }//MatrixMatrix
175 }//Tpetra
176 
177 
178 
179 #endif