Compadre  1.5.9
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Compadre_LinearAlgebra.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Compadre: COMpatible PArticle Discretization and REmap Toolkit
4 //
5 // Copyright 2018 NTESS and the Compadre contributors.
6 // SPDX-License-Identifier: BSD-2-Clause
7 // *****************************************************************************
8 // @HEADER
10 
11 #include "KokkosBatched_Copy_Decl.hpp"
12 #include "KokkosBatched_ApplyPivot_Decl.hpp"
13 #include "KokkosBatched_Gemv_Decl.hpp"
14 #include "KokkosBatched_Trsv_Decl.hpp"
15 #include "KokkosBatched_UTV_Decl.hpp"
16 #include "KokkosBatched_SolveUTV_Decl_Compadre.hpp"
17 
18 using namespace KokkosBatched;
19 
20 namespace Compadre{
21 namespace GMLS_LinearAlgebra {
22 
23  template<typename DeviceType,
24  typename AlgoTagType,
25  typename MatrixViewType_A,
26  typename MatrixViewType_B,
27  typename MatrixViewType_X>
29  MatrixViewType_A _a;
30  MatrixViewType_B _b;
31 
34  int _M, _N, _NRHS;
36 
37  KOKKOS_INLINE_FUNCTION
39  const int M,
40  const int N,
41  const int NRHS,
42  const MatrixViewType_A &a,
43  const MatrixViewType_B &b,
44  const bool implicit_RHS)
45  : _a(a), _b(b), _M(M), _N(N), _NRHS(NRHS), _implicit_RHS(implicit_RHS)
46  { _pm_getTeamScratchLevel_0 = 0; _pm_getTeamScratchLevel_1 = 0; }
47 
48  template<typename MemberType>
49  KOKKOS_INLINE_FUNCTION
50  void operator()(const MemberType &member) const {
51 
52  const int k = member.league_rank();
53 
54  // workspace vectors
55  scratch_vector_type ww_fast(member.team_scratch(_pm_getTeamScratchLevel_0), 3*_M);
56  scratch_vector_type ww_slow(member.team_scratch(_pm_getTeamScratchLevel_1), _N*_NRHS);
57 
58  scratch_matrix_right_type aa(_a.data() + TO_GLOBAL(k)*TO_GLOBAL(_a.extent(1))*TO_GLOBAL(_a.extent(2)),
59  _a.extent(1), _a.extent(2));
60  scratch_matrix_right_type bb(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
61  _b.extent(1), _b.extent(2));
62  scratch_matrix_right_type xx(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
63  _b.extent(1), _b.extent(2));
64 
65  // if sizes don't match extents, then copy to a view with extents matching sizes
66  if ((size_t)_M!=_a.extent(1) || (size_t)_N!=_a.extent(2)) {
67  scratch_matrix_right_type tmp(ww_slow.data(), _M, _N);
68  auto aaa = scratch_matrix_right_type(_a.data() + TO_GLOBAL(k)*TO_GLOBAL(_a.extent(1))*TO_GLOBAL(_a.extent(2)), _M, _N);
69  // copy A to W, then back to A
70  Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](const int &i) {
71  Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](const int &j) {
72  tmp(i,j) = aa(i,j);
73  });
74  });
75  member.team_barrier();
76  Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](const int &i) {
77  Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](const int &j) {
78  aaa(i,j) = tmp(i,j);
79  });
80  });
81  member.team_barrier();
82  aa = aaa;
83  }
84 
85  if (std::is_same<typename MatrixViewType_B::array_layout, layout_left>::value) {
86  scratch_matrix_right_type tmp(ww_slow.data(), _N, _NRHS);
87  // coming from LU
88  // then copy B to W, then back to B
89  auto bb_left =
90  scratch_matrix_left_type(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
91  _b.extent(1), _b.extent(2));
92  Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](const int &i) {
93  Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](const int &j) {
94  tmp(i,j) = bb_left(i,j);
95  });
96  });
97  member.team_barrier();
98  Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](const int &i) {
99  Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](const int &j) {
100  bb(i,j) = tmp(i,j);
101  });
102  });
103  }
104 
105  scratch_matrix_right_type uu(member.team_scratch(_pm_getTeamScratchLevel_1), _M, _N /* only N columns of U are filled, maximum */);
106  scratch_matrix_right_type vv(member.team_scratch(_pm_getTeamScratchLevel_1), _N, _N);
107  scratch_local_index_type pp(member.team_scratch(_pm_getTeamScratchLevel_0), _N);
108 
109  bool do_print = false;
110  if (do_print) {
111  Kokkos::single(Kokkos::PerTeam(member), [&] () {
112 #if KOKKOS_VERSION >= 40200
113  using Kokkos::printf;
114 #endif
115  //print a
116  printf("a=zeros(%lu,%lu);\n", aa.extent(0), aa.extent(1));
117  for (size_t i=0; i<aa.extent(0); ++i) {
118  for (size_t j=0; j<aa.extent(1); ++j) {
119  printf("a(%lu,%lu)= %f;\n", i+1,j+1, aa(i,j));
120  }
121  }
122  //print b
123  printf("b=zeros(%lu,%lu);\n", bb.extent(0), bb.extent(1));
124  for (size_t i=0; i<bb.extent(0); ++i) {
125  for (size_t j=0; j<bb.extent(1); ++j) {
126  printf("b(%lu,%lu)= %f;\n", i+1,j+1, bb(i,j));
127  }
128  }
129  });
130  }
131  do_print = false;
132 
133  /// Solving Ax = b using UTV transformation
134  /// A P^T P x = b
135  /// UTV P x = b;
136 
137  /// UTV = A P^T
138  int matrix_rank(0);
139  member.team_barrier();
140  TeamVectorUTV<MemberType,AlgoTagType>
141  ::invoke(member, aa, pp, uu, vv, ww_fast, matrix_rank);
142  member.team_barrier();
143 
144  if (do_print) {
145  Kokkos::single(Kokkos::PerTeam(member), [&] () {
146 #if KOKKOS_VERSION >= 40200
147  using Kokkos::printf;
148 #endif
149  printf("matrix_rank: %d\n", matrix_rank);
150  //print u
151  printf("u=zeros(%lu,%lu);\n", uu.extent(0), uu.extent(1));
152  for (size_t i=0; i<uu.extent(0); ++i) {
153  for (size_t j=0; j<uu.extent(1); ++j) {
154  printf("u(%lu,%lu)= %f;\n", i+1,j+1, uu(i,j));
155  }
156  }
157  });
158  }
159  TeamVectorSolveUTVCompadre<MemberType,AlgoTagType>
160  ::invoke(member, matrix_rank, _M, _N, _NRHS, uu, aa, vv, pp, bb, xx, ww_slow, ww_fast, _implicit_RHS);
161  member.team_barrier();
162 
163  }
164 
165  inline
166  void run(ParallelManager pm) {
167  typedef typename MatrixViewType_A::non_const_value_type value_type;
168  std::string name_region("KokkosBatched::Test::TeamVectorSolveUTVCompadre");
169  std::string name_value_type = ( std::is_same<value_type,float>::value ? "::Float" :
170  std::is_same<value_type,double>::value ? "::Double" :
171  std::is_same<value_type,Kokkos::complex<float> >::value ? "::ComplexFloat" :
172  std::is_same<value_type,Kokkos::complex<double> >::value ? "::ComplexDouble" : "::UnknownValueType" );
173  std::string name = name_region + name_value_type;
174  Kokkos::Profiling::pushRegion( name.c_str() );
175 
176  _pm_getTeamScratchLevel_0 = pm.getTeamScratchLevel(0);
177  _pm_getTeamScratchLevel_1 = pm.getTeamScratchLevel(1);
178 
179  int scratch_size = scratch_matrix_right_type::shmem_size(_N, _N); // V
180  scratch_size += scratch_matrix_right_type::shmem_size(_M, _N /* only N columns of U are filled, maximum */); // U
181  scratch_size += scratch_vector_type::shmem_size(_N*_NRHS); // W (for SolveUTV)
182 
183  int l0_scratch_size = scratch_vector_type::shmem_size(_N); // P (temporary)
184  l0_scratch_size += scratch_vector_type::shmem_size(3*_M); // W (for UTV)
185 
186  pm.clearScratchSizes();
187  pm.setTeamScratchSize(0, l0_scratch_size);
188  pm.setTeamScratchSize(1, scratch_size);
189 
190  pm.CallFunctorWithTeamThreadsAndVectors(*this, _a.extent(0));
191  Kokkos::fence();
192 
193  Kokkos::Profiling::popRegion();
194  }
195  };
196 
197 
198 
199 template <typename A_layout, typename B_layout, typename X_layout>
200 void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS) {
201 
202  typedef Algo::UTV::Unblocked algo_tag_type;
203  typedef Kokkos::View<double***, A_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
204  MatrixViewType_A;
205  typedef Kokkos::View<double***, B_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
206  MatrixViewType_B;
207  typedef Kokkos::View<double***, X_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
208  MatrixViewType_X;
209 
210  MatrixViewType_A mat_A(A, num_matrices, lda, nda);
211  MatrixViewType_B mat_B(B, num_matrices, ldb, ndb);
212 
214  <device_execution_space, algo_tag_type, MatrixViewType_A, MatrixViewType_B, MatrixViewType_X>(M,N,NRHS,mat_A,mat_B,implicit_RHS).run(pm);
215 
216 }
217 
218 template void batchQRPivotingSolve<layout_right, layout_right, layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
219 template void batchQRPivotingSolve<layout_right, layout_right, layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
220 template void batchQRPivotingSolve<layout_right, layout_left , layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
221 template void batchQRPivotingSolve<layout_right, layout_left , layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
222 template void batchQRPivotingSolve<layout_left , layout_right, layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
223 template void batchQRPivotingSolve<layout_left , layout_right, layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
224 template void batchQRPivotingSolve<layout_left , layout_left , layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
225 template void batchQRPivotingSolve<layout_left , layout_left , layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
226 
227 } // GMLS_LinearAlgebra
228 } // Compadre
template void batchQRPivotingSolve< layout_right, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
template void batchQRPivotingSolve< layout_left, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
staggered TIMEOUT REQUIRED_FILES< TARGET_FILE:GMLS_Staggered_Manifold_Test > run('${SWIG_PREFIX}/Matlab_1D_Using_Python_Interface.m')
void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS)
Solves a batch of problems with QR+Pivoting.
KOKKOS_INLINE_FUNCTION Functor_TestBatchedTeamVectorSolveUTV(const int M, const int N, const int NRHS, const MatrixViewType_A &a, const MatrixViewType_B &b, const bool implicit_RHS)
Kokkos::DefaultExecutionSpace device_execution_space
#define TO_GLOBAL(variable)
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
Kokkos::View< double **, layout_left, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_left_type
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
void setTeamScratchSize(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const
template void batchQRPivotingSolve< layout_right, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
Kokkos::View< int *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_local_index_type
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)