1 #include <Compadre_Config.h>
5 #include <Kokkos_Timer.hpp>
6 #include <Kokkos_Core.hpp>
8 #ifdef COMPADRE_USE_MPI
12 using namespace Compadre;
36 int main (
int argc,
char* args[]) {
39 #ifdef COMPADRE_USE_MPI
40 MPI_Init(&argc, &args);
46 Kokkos::initialize(argc, args);
48 const int P_rows = 100;
49 const int P_cols = 50;
50 assert((P_rows >= P_cols) &&
"P must not be underdetermined.");
52 const int RHS_rows = P_rows;
54 const int num_matrices = 20;
56 Kokkos::Profiling::pushRegion(
"Instantiating Data");
57 auto all_P = Kokkos::View<double*>(
"P", num_matrices*P_cols*P_rows);
58 auto all_RHS = Kokkos::View<double*>(
"RHS", num_matrices*RHS_rows*RHS_rows);
59 Kokkos::Profiling::popRegion();
61 Kokkos::parallel_for(
"Fill Matrices", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(
const int i) {
62 Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
63 P(all_P.data() + i*P_cols*P_rows, P_rows, P_cols);
65 Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
66 RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
68 for (
int j=0; j<P_rows; ++j) {
69 for (
int k=0; k<P_cols; ++k) {
74 for (
int j=0; j<RHS_rows; ++j) {
75 for (
int k=0; k<RHS_rows; ++k) {
85 GMLS_LinearAlgebra::batchSVDFactorize(pm,
true, all_P.data(), P_rows, P_cols,
true, all_RHS.data(), RHS_rows, RHS_rows, P_rows, P_cols, P_rows, num_matrices);
88 const double tol = 1e-10;
89 Kokkos::parallel_reduce(
"Check Solution", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(
const int i,
int& t_wrong) {
90 Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
91 RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
94 for (
int j=0; j<P_cols; ++j) {
95 if (std::abs(RHS(j,j)-1./P_cols) > tol) {
105 #ifdef COMPADRE_USE_MPI
109 #ifndef LAPACK_DECLARED_THREADSAFE
110 printf(
"LAPACK_DECLARED_THREADSAFE=OFF. Massive performance loss due to serial LAPACK implementation provided at configure.\n");
114 if (number_wrong > 0) {
115 printf(
"Incorrect result. LAPACK IS NOT THREADSAFE AND CANNOT BE USED WITH THIS TOOLKIT! Either provide a thread safe LAPACK+BLAS combination or set -DLAPACK_DECLARED_THREADSAFE:BOOL=OFF in CMake and take a MASSIVE performance hit.\n");
int main(int argc, char *args[])
[Parse Command Line Arguments]
void batchSVDFactorize(ParallelManager pm, bool swap_layout_P, double *P, int lda, int nda, bool swap_layout_RHS, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of SVD problems.