Compadre  1.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
UnitTest_ThreadedLapack.cpp
Go to the documentation of this file.
1 #include <Compadre_Config.h>
3 
4 #include <assert.h>
5 #include <Kokkos_Timer.hpp>
6 #include <Kokkos_Core.hpp>
7 
8 #ifdef COMPADRE_USE_MPI
9 #include <mpi.h>
10 #endif
11 
12 using namespace Compadre;
13 
14 //! This tests whether or not the LAPACK+BLAS combination is safe to have a parallel_for wrapping them
15 //! If they are not threadsafe, then nothing in this toolkit will work and another library will have
16 //! be provided that is threadsafe.
17 //!
18 //! Creates num_matrices copies of a matrix P and RHS
19 //! P has dimensions P_rows x P_cols
20 //! RHS has dimensions RHS_rows x RHS_cols
21 //!
22 //! P*X=RHS is an overdetermined system from the perspective of rows to columns
23 //! But it is underdetermined in the sense that it actually has a large nullspace
24 //! because we fill P with ones, and RHS with ones.
25 //!
26 //! The result is that solution should be 1 / P_cols for each entry in RHS
27 //!
28 //! This is a fairly easy exercise for dgelsd in LAPACK, but if LAPACK+BLAS are not
29 //! thread safe, then this will fail to get the correct answer.
30 //!
31 //! If this test fails, then either a different LAPACK library should be provided or
32 //! compiled. If you are compiling, be sure to add the -frecursive
33 //! see https://github.com/xianyi/OpenBLAS/issues/477
34 
35 // called from command line
36 int main (int argc, char* args[]) {
37 
38 // initializes MPI (if available) with command line arguments given
39 #ifdef COMPADRE_USE_MPI
40 MPI_Init(&argc, &args);
41 #endif
42 
43 int number_wrong = 0;
44 
45 // initializes Kokkos with command line arguments given
46 Kokkos::initialize(argc, args);
47 {
48  const int P_rows = 100;
49  const int P_cols = 50;
50  assert((P_rows >= P_cols) && "P must not be underdetermined.");
51 
52  const int RHS_rows = P_rows;
53 
54  const int num_matrices = 20;
55 
56  Kokkos::Profiling::pushRegion("Instantiating Data");
57  auto all_P = Kokkos::View<double*>("P", num_matrices*P_cols*P_rows);
58  auto all_RHS = Kokkos::View<double*>("RHS", num_matrices*RHS_rows*RHS_rows);
59  Kokkos::Profiling::popRegion();
60 
61  Kokkos::parallel_for("Fill Matrices", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(const int i) {
62  Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
63  P(all_P.data() + i*P_cols*P_rows, P_rows, P_cols);
64 
65  Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
66  RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
67 
68  for (int j=0; j<P_rows; ++j) {
69  for (int k=0; k<P_cols; ++k) {
70  P(j,k) = 1.0;
71  }
72  }
73 
74  for (int j=0; j<RHS_rows; ++j) {
75  for (int k=0; k<RHS_rows; ++k) {
76  RHS(j,k) = 1.0;
77  }
78  }
79  });
80  Kokkos::fence();
81 
82 
83  // call SVD on all num_matrices
84  ParallelManager pm;
85  GMLS_LinearAlgebra::batchSVDFactorize(pm, true, all_P.data(), P_rows, P_cols, true, all_RHS.data(), RHS_rows, RHS_rows, P_rows, P_cols, P_rows, num_matrices);
86 
87 
88  const double tol = 1e-10;
89  Kokkos::parallel_reduce("Check Solution", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(const int i, int& t_wrong) {
90  Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
91  RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
92 
93  // check the diagonals for the true solution
94  for (int j=0; j<P_cols; ++j) {
95  if (std::abs(RHS(j,j)-1./P_cols) > tol) {
96  t_wrong++;
97  }
98  }
99 
100  }, number_wrong);
101 
102 }
103 // finalize Kokkos
104 Kokkos::finalize();
105 #ifdef COMPADRE_USE_MPI
106 MPI_Finalize();
107 #endif
108 
109 #ifndef LAPACK_DECLARED_THREADSAFE
110  printf("LAPACK_DECLARED_THREADSAFE=OFF. Massive performance loss due to serial LAPACK implementation provided at configure.\n");
111  return 77;
112 #endif
113 
114 if (number_wrong > 0) {
115  printf("Incorrect result. LAPACK IS NOT THREADSAFE AND CANNOT BE USED WITH THIS TOOLKIT! Either provide a thread safe LAPACK+BLAS combination or set -DLAPACK_DECLARED_THREADSAFE:BOOL=OFF in CMake and take a MASSIVE performance hit.\n");
116  return -1;
117 }
118 return 0;
119 }
int main(int argc, char *args[])
[Parse Command Line Arguments]
Definition: GMLS_Device.cpp:29
void batchSVDFactorize(ParallelManager pm, bool swap_layout_P, double *P, int lda, int nda, bool swap_layout_RHS, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of SVD problems.