11 #include "KokkosBatched_UTV_Decl.hpp" 
   12 #include "KokkosBatched_SolveUTV_Decl_Compadre.hpp" 
   14 using namespace KokkosBatched;
 
   17 namespace GMLS_LinearAlgebra {
 
   19   template<
typename DeviceType,
 
   21            typename MatrixViewType_A,
 
   22            typename MatrixViewType_B,
 
   23            typename MatrixViewType_X>
 
   33     KOKKOS_INLINE_FUNCTION
 
   38                       const MatrixViewType_A &a,
 
   39                       const MatrixViewType_B &b,
 
   40                       const bool implicit_RHS)
 
   41       : _a(a), _b(b), _M(M), _N(N), _NRHS(NRHS), _implicit_RHS(implicit_RHS) 
 
   42         { _pm_getTeamScratchLevel_0 = 0; _pm_getTeamScratchLevel_1 = 0; }
 
   44     template<
typename MemberType>
 
   45     KOKKOS_INLINE_FUNCTION
 
   48       const int k = member.league_rank();
 
   55               _a.extent(1), _a.extent(2));
 
   57               _b.extent(1), _b.extent(2));
 
   59               _b.extent(1), _b.extent(2));
 
   62       if ((
size_t)_M!=_a.extent(1) || (size_t)_N!=_a.extent(2)) {
 
   66         Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](
const int &i) {
 
   67           Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](
const int &j) {
 
   71         member.team_barrier();
 
   72         Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](
const int &i) {
 
   73           Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](
const int &j) {
 
   77         member.team_barrier();
 
   81       if (std::is_same<typename MatrixViewType_B::array_layout, layout_left>::value) {
 
   87                     _b.extent(1), _b.extent(2));
 
   88         Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](
const int &i) {
 
   89           Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](
const int &j) {
 
   90               tmp(i,j) = bb_left(i,j);
 
   93         member.team_barrier();
 
   94         Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](
const int &i) {
 
   95           Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](
const int &j) {
 
  105       bool do_print = 
false;
 
  107         Kokkos::single(Kokkos::PerTeam(member), [&] () {
 
  108           using Kokkos::printf;
 
  110           printf(
"a=zeros(%lu,%lu);\n", aa.extent(0), aa.extent(1));
 
  111               for (
size_t i=0; i<aa.extent(0); ++i) {
 
  112                   for (
size_t j=0; j<aa.extent(1); ++j) {
 
  113                       printf(
"a(%lu,%lu)= %f;\n", i+1,j+1, aa(i,j));
 
  117           printf(
"b=zeros(%lu,%lu);\n", bb.extent(0), bb.extent(1));
 
  118               for (
size_t i=0; i<bb.extent(0); ++i) {
 
  119                   for (
size_t j=0; j<bb.extent(1); ++j) {
 
  120                       printf(
"b(%lu,%lu)= %f;\n", i+1,j+1, bb(i,j));
 
  133       member.team_barrier();
 
  134       TeamVectorUTV<MemberType,AlgoTagType>
 
  135         ::invoke(member, aa, pp, uu, vv, ww_fast, matrix_rank);
 
  136       member.team_barrier();
 
  139         Kokkos::single(Kokkos::PerTeam(member), [&] () {
 
  140         using Kokkos::printf;
 
  141         printf(
"matrix_rank: %d\n", matrix_rank);
 
  143         printf(
"u=zeros(%lu,%lu);\n", uu.extent(0), uu.extent(1));
 
  144         for (
size_t i=0; i<uu.extent(0); ++i) {
 
  145             for (
size_t j=0; j<uu.extent(1); ++j) {
 
  146                 printf(
"u(%lu,%lu)= %f;\n", i+1,j+1, uu(i,j));
 
  151       TeamVectorSolveUTVCompadre<MemberType,AlgoTagType>
 
  152         ::invoke(member, matrix_rank, _M, _N, _NRHS, uu, aa, vv, pp, bb, xx, ww_slow, ww_fast, _implicit_RHS);
 
  153       member.team_barrier();
 
  159       typedef typename MatrixViewType_A::non_const_value_type value_type;
 
  160       std::string name_region(
"KokkosBatched::Test::TeamVectorSolveUTVCompadre");
 
  161       std::string name_value_type = ( std::is_same<value_type,float>::value ? 
"::Float" :
 
  162                                       std::is_same<value_type,double>::value ? 
"::Double" :
 
  163                                       std::is_same<value_type,Kokkos::complex<float> >::value ? 
"::ComplexFloat" :
 
  164                                       std::is_same<value_type,Kokkos::complex<double> >::value ? 
"::ComplexDouble" : 
"::UnknownValueType" );
 
  165       std::string name = name_region + name_value_type;
 
  166       Kokkos::Profiling::pushRegion( name.c_str() );
 
  171       int scratch_size = scratch_matrix_right_type::shmem_size(_N, _N); 
 
  172       scratch_size += scratch_matrix_right_type::shmem_size(_M, _N ); 
 
  173       scratch_size += scratch_vector_type::shmem_size(_N*_NRHS); 
 
  175       int l0_scratch_size = scratch_vector_type::shmem_size(_N); 
 
  176       l0_scratch_size += scratch_vector_type::shmem_size(3*_M); 
 
  185       Kokkos::Profiling::popRegion();
 
  191 template <
typename A_layout, 
typename B_layout, 
typename X_layout>
 
  192 void batchQRPivotingSolve(
ParallelManager pm, 
double *A, 
int lda, 
int nda, 
double *B, 
int ldb, 
int ndb, 
int M, 
int N, 
int NRHS, 
const int num_matrices, 
const bool implicit_RHS) {
 
  194     typedef Algo::UTV::Unblocked algo_tag_type;
 
  195     typedef Kokkos::View<double***, A_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
  197     typedef Kokkos::View<double***, B_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
  199     typedef Kokkos::View<double***, X_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
  202     MatrixViewType_A mat_A(A, num_matrices, lda, nda);
 
  203     MatrixViewType_B mat_B(B, num_matrices, ldb, ndb);
 
  206       <
device_execution_space, algo_tag_type, MatrixViewType_A, MatrixViewType_B, MatrixViewType_X>(M,N,NRHS,mat_A,mat_B,implicit_RHS).
run(pm);
 
  210 template void batchQRPivotingSolve<layout_right, layout_right, layout_right>(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  211 template void batchQRPivotingSolve<layout_right, layout_right, layout_left >(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  212 template void batchQRPivotingSolve<layout_right, layout_left , layout_right>(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  213 template void batchQRPivotingSolve<layout_right, layout_left , layout_left >(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  214 template void batchQRPivotingSolve<layout_left , layout_right, layout_right>(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  215 template void batchQRPivotingSolve<layout_left , layout_right, layout_left >(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  216 template void batchQRPivotingSolve<layout_left , layout_left , layout_right>(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
  217 template void batchQRPivotingSolve<layout_left , layout_left , layout_left >(
ParallelManager,
double*,int,int,
double*,int,int,int,int,int,
const int,
const bool);
 
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
 
template void batchQRPivotingSolve< layout_right, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
template void batchQRPivotingSolve< layout_left, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const 
 
template void batchQRPivotingSolve< layout_left, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
template void batchQRPivotingSolve< layout_left, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
int _pm_getTeamScratchLevel_0
 
Kokkos::View< int *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_local_index_type
 
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
 
int _pm_getTeamScratchLevel_1
 
staggered TIMEOUT REQUIRED_FILES< TARGET_FILE:GMLS_Staggered_Manifold_Test > run('${SWIG_PREFIX}/Matlab_1D_Using_Python_Interface.m')
 
void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS)
Solves a batch of problems with QR+Pivoting. 
 
KOKKOS_INLINE_FUNCTION Functor_TestBatchedTeamVectorSolveUTV(const int M, const int N, const int NRHS, const MatrixViewType_A &a, const MatrixViewType_B &b, const bool implicit_RHS)
 
#define TO_GLOBAL(variable)
 
void run(ParallelManager pm)
 
Kokkos::DefaultExecutionSpace device_execution_space
 
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
void setTeamScratchSize(const int level, const int value)
 
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const 
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
 
Kokkos::View< double **, layout_left, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_left_type
 
KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const 
 
template void batchQRPivotingSolve< layout_right, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
template void batchQRPivotingSolve< layout_left, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
 
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)