1 #include <Compadre_Config.h> 
    5 #include <Kokkos_Timer.hpp> 
    6 #include <Kokkos_Core.hpp> 
    8 #ifdef COMPADRE_USE_MPI 
   12 using namespace Compadre;
 
   36 int main (
int argc, 
char* args[]) {
 
   39 #ifdef COMPADRE_USE_MPI 
   40 MPI_Init(&argc, &args);
 
   46 Kokkos::initialize(argc, args);
 
   48     const int P_rows   = 100;
 
   49     const int P_cols   = 50;
 
   50     assert((P_rows >= P_cols) && 
"P must not be underdetermined.");
 
   52     const int RHS_rows = P_rows;
 
   54     const int num_matrices = 20;
 
   56     Kokkos::Profiling::pushRegion(
"Instantiating Data");
 
   57     auto all_P   = Kokkos::View<double*>(
"P", num_matrices*P_cols*P_rows);
 
   58     auto all_RHS = Kokkos::View<double*>(
"RHS", num_matrices*RHS_rows*RHS_rows);
 
   59     Kokkos::Profiling::popRegion();
 
   61     Kokkos::parallel_for(
"Fill Matrices", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(
const int i) {
 
   62         Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
   63             P(all_P.data() + i*P_cols*P_rows, P_rows, P_cols);
 
   65         Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
   66             RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
 
   68         for (
int j=0; j<P_rows; ++j) {
 
   69             for (
int k=0; k<P_cols; ++k) {
 
   74         for (
int j=0; j<RHS_rows; ++j) {
 
   75             for (
int k=0; k<RHS_rows; ++k) {
 
   85     GMLS_LinearAlgebra::batchSVDFactorize(pm, 
true, all_P.data(), P_rows, P_cols, 
true, all_RHS.data(), RHS_rows, RHS_rows, P_rows, P_cols, P_rows, num_matrices);
 
   88     const double tol = 1e-10;
 
   89     Kokkos::parallel_reduce(
"Check Solution", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0,num_matrices), KOKKOS_LAMBDA(
const int i, 
int& t_wrong) {
 
   90         Kokkos::View<double**, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
   91             RHS(all_RHS.data() + i*RHS_rows*RHS_rows, RHS_rows, RHS_rows);
 
   94         for (
int j=0; j<P_cols; ++j) {
 
   95             if (std::abs(RHS(j,j)-1./P_cols) > tol) {
 
  105 #ifdef COMPADRE_USE_MPI 
  109 #ifndef LAPACK_DECLARED_THREADSAFE 
  110     printf(
"LAPACK_DECLARED_THREADSAFE=OFF. Massive performance loss due to serial LAPACK implementation provided at configure.\n");
 
  114 if (number_wrong > 0) {
 
  115     printf(
"Incorrect result. LAPACK IS NOT THREADSAFE AND CANNOT BE USED WITH THIS TOOLKIT! Either provide a thread safe LAPACK+BLAS combination or set -DLAPACK_DECLARED_THREADSAFE:BOOL=OFF in CMake and take a MASSIVE performance hit.\n");
 
int main(int argc, char *args[])
[Parse Command Line Arguments] 
 
void batchSVDFactorize(ParallelManager pm, bool swap_layout_P, double *P, int lda, int nda, bool swap_layout_RHS, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of SVD problems.