Compadre  1.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Compadre_GMLS.cpp
Go to the documentation of this file.
1 #include "Compadre_GMLS.hpp"
6 #include "Compadre_Functors.hpp"
8 
9 namespace Compadre {
10 
11 void GMLS::generatePolynomialCoefficients(const int number_of_batches, const bool keep_coefficients) {
12 
13  compadre_assert_release( (keep_coefficients==false || number_of_batches==1)
14  && "keep_coefficients is set to true, but number of batches exceeds 1.");
15 
16  /*
17  * Generate Quadrature
18  */
20 
21  /*
22  * Operations to Device
23  */
24 
25  // copy over operations
26  _operations = decltype(_operations)("operations", _lro.size());
27  _host_operations = Kokkos::create_mirror_view(_operations);
28 
29  // make sure at least one target operation specified
30  compadre_assert_release((_lro.size() > 0)
31  && "No target operations added to GMLS class before calling generatePolynomialCoefficients().");
32 
33  // loop through list of linear reconstruction operations to be performed and set them on the host
34  for (size_t i=0; i<_lro.size(); ++i) _host_operations(i) = _lro[i];
35 
36  // get copy of operations on the device
37  Kokkos::deep_copy(_operations, _host_operations);
38 
39  // check that if any target sites added, that neighbors_lists has equal rows
41  && "Neighbor lists not set in GMLS class before calling generatePolynomialCoefficients.");
42 
43  // check that if any target sites are greater than zero (could be zero), then there are more than zero source sites
45  && "Source coordinates not set in GMLS class before calling generatePolynomialCoefficients.");
46 
47  /*
48  * Initialize Alphas and Prestencil Weights
49  */
50 
51  // throw an assertion for QR solver incompatibility
52  // TODO: this is a temporary location for this check, in the future the
53  // constraint type could be an object that can check when given a dense_solver_type
56  && "Cannot solve GMLS problems with the NEUMANN_GRAD_SCALAR constraint using QR Factorization.");
57 
58  // calculate the additional size for different constraint problems
61 
62  // initialize all alpha values to be used for taking the dot product with data to get a reconstruction
63  try {
65  int total_added_alphas = _target_coordinates.extent(0)*_added_alpha_size;
66  _alphas = decltype(_alphas)("alphas", (total_neighbors + TO_GLOBAL(total_added_alphas))
68  } catch(std::exception &e) {
69  printf("Insufficient memory to store alphas: \n\n%s", e.what());
70  throw e;
71  }
72 
73  // initialize the prestencil weights that are applied to sampling data to put it into a form
74  // that the GMLS operator will be able to operate on
75  auto sro = _data_sampling_functional;
76  try {
77  _prestencil_weights = decltype(_prestencil_weights)("Prestencil weights",
78  std::pow(2,sro.use_target_site_weights),
79  (sro.transform_type==DifferentEachTarget
80  || sro.transform_type==DifferentEachNeighbor) ?
82  (sro.transform_type==DifferentEachNeighbor) ?
84  (sro.output_rank>0) ?
86  (sro.input_rank>0) ?
87  _global_dimensions : 1);
88  } catch(std::exception &e) {
89  printf("Insufficient memory to store prestencil weights: \n\n%s", e.what());
90  throw e;
91  }
92  Kokkos::fence();
93 
94  /*
95  * Determine if Nontrivial Null Space in Solution
96  */
97 
98  // check whether the sampling function acting on the basis will induce a nontrivial nullspace
99  // an example would be reconstructing from gradient information, which would annihilate constants
101 
102  /*
103  * Determine if Nonstandard Sampling Dimension or Basis Component Dimension
104  */
105 
106 
107  // calculate the dimension of the basis (a vector space on a manifold requires two components, for example)
109 
110  // calculate sampling dimension
112 
113  // effective number of components in the basis
115 
116  // special case for using a higher order for sampling from a polynomial space that are gradients of a scalar polynomial
118  // if the reconstruction is being made with a gradient of a basis, then we want that basis to be one order higher so that
119  // the gradient is consistent with the convergence order expected.
120  _poly_order += 1;
122  }
123 
124  /*
125  * Dimensions
126  */
127 
128  // for tallying scratch space needed for device kernel calls
129  int team_scratch_size_a = 0;
130 
131  // TEMPORARY, take to zero after conversion
132  int team_scratch_size_b = 0;
133  int thread_scratch_size_a = 0;
134  int thread_scratch_size_b = 0;
135 
136  // dimensions that are relevant for each subproblem
137  int max_num_rows = _sampling_multiplier*_max_num_neighbors;
138  int this_num_cols = _basis_multiplier*_NP;
139  int manifold_NP = 0;
140 
142  // these dimensions already calculated differ in the case of manifolds
145  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
146  this_num_cols = _basis_multiplier*max_manifold_NP;
147  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
148  const int max_P_row_size = ((_dimensions-1)*manifold_NP > max_manifold_NP*_total_alpha_values*_basis_multiplier) ? (_dimensions-1)*manifold_NP : max_manifold_NP*_total_alpha_values*_basis_multiplier*_max_evaluation_sites_per_target;
149 
150  /*
151  * Calculate Scratch Space Allocations
152  */
153 
154  team_scratch_size_b += scratch_matrix_right_type::shmem_size(_dimensions-1, _dimensions-1); // G
155  team_scratch_size_b += scratch_matrix_right_type::shmem_size(_dimensions, _dimensions); // PTP matrix
156  team_scratch_size_b += scratch_vector_type::shmem_size( (_dimensions-1)*_max_num_neighbors ); // manifold_gradient
157 
158  team_scratch_size_b += scratch_vector_type::shmem_size(_max_num_neighbors*std::max(_sampling_multiplier,_basis_multiplier)); // t1 work vector for qr
159  team_scratch_size_b += scratch_vector_type::shmem_size(_max_num_neighbors*std::max(_sampling_multiplier,_basis_multiplier)); // t2 work vector for qr
160 
161  team_scratch_size_b += scratch_vector_type::shmem_size(max_P_row_size); // row of P matrix, one for each operator
162  thread_scratch_size_b += scratch_vector_type::shmem_size(max_manifold_NP*_basis_multiplier); // delta, used for each thread
163  thread_scratch_size_b += scratch_vector_type::shmem_size((max_poly_order+1)*_global_dimensions); // temporary space for powers in basis
165  thread_scratch_size_b += scratch_vector_type::shmem_size(_dimensions*_dimensions); // temporary tangent calculations, used for each thread
166  }
167 
168 
169  // allocate data on the device (initialized to zero)
170  _T = Kokkos::View<double*>("tangent approximation",_target_coordinates.extent(0)*_dimensions*_dimensions);
171  _manifold_metric_tensor_inverse = Kokkos::View<double*>("manifold metric tensor inverse",_target_coordinates.extent(0)*(_dimensions-1)*(_dimensions-1));
172  _manifold_curvature_coefficients = Kokkos::View<double*>("manifold curvature coefficients",_target_coordinates.extent(0)*manifold_NP);
173  _manifold_curvature_gradient = Kokkos::View<double*>("manifold curvature gradient",_target_coordinates.extent(0)*(_dimensions-1));
174 
175  } else { // Standard GMLS
176 
177  /*
178  * Calculate Scratch Space Allocations
179  */
180 
181  team_scratch_size_a += scratch_vector_type::shmem_size(max_num_rows); // t1 work vector for qr
182  team_scratch_size_a += scratch_vector_type::shmem_size(max_num_rows); // t2 work vector for qr
183 
184  // row of P matrix, one for each operator
185  // +1 is for the original target site which always gets evaluated
186  team_scratch_size_b += scratch_vector_type::shmem_size(this_num_cols*_total_alpha_values*_max_evaluation_sites_per_target);
187 
188  thread_scratch_size_b += scratch_vector_type::shmem_size(this_num_cols); // delta, used for each thread
189  thread_scratch_size_b += scratch_vector_type::shmem_size((_poly_order+1)*_global_dimensions); // temporary space for powers in basis
190  }
191 #ifdef COMPADRE_USE_CUDA
193 #endif
194  _pm.setTeamScratchSize(0, team_scratch_size_a);
195  _pm.setTeamScratchSize(1, team_scratch_size_b);
196  _pm.setThreadScratchSize(0, thread_scratch_size_a);
197  _pm.setThreadScratchSize(1, thread_scratch_size_b);
198 
199  /*
200  * Calculate the size for matrix P and RHS
201  */
202 
203  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
204  int P_dim_0, P_dim_1;
205  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
206 
207  /*
208  * Allocate Global Device Storage of Data Needed Over Multiple Calls
209  */
210 
211  global_index_type max_batch_size = (_target_coordinates.extent(0) + TO_GLOBAL(number_of_batches) - 1) / TO_GLOBAL(number_of_batches);
212  try {
213  _RHS = Kokkos::View<double*>("RHS", max_batch_size*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim));
214  _P = Kokkos::View<double*>("P", max_batch_size*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1));
215  _w = Kokkos::View<double*>("w", max_batch_size*TO_GLOBAL(max_num_rows));
216  } catch (std::exception &e) {
217  printf("Failed to allocate space for RHS, P, and w. Consider increasing number_of_batches: \n\n%s", e.what());
218  throw e;
219  }
220  Kokkos::fence();
221 
222  /*
223  * Calculate Optimal Threads Based On Levels of Parallelism
224  */
225 
228  && "Normal vectors are required for solving GMLS problems with the NEUMANN_GRAD_SCALAR constraint.");
229  }
230 
231 
233  for (int batch_num=0; batch_num<number_of_batches; ++batch_num) {
234 
235  auto this_batch_size = std::min(_target_coordinates.extent(0)-_initial_index_for_batch, max_batch_size);
236  Kokkos::deep_copy(_RHS, 0.0);
237  Kokkos::deep_copy(_P, 0.0);
238  Kokkos::deep_copy(_w, 0.0);
239 
241 
242  /*
243  * MANIFOLD Problems
244  */
245 
246  if (!_orthonormal_tangent_space_provided) { // user did not specify orthonormal tangent directions, so we approximate them first
247  // coarse tangent plane approximation construction of P^T*P
249 
250  // if the user provided the reference outward normal direction, then orient the computed or user provided
251  // outward normal directions in the tangent bundle
253  // use the reference outward normal direction provided by the user to orient
254  // the tangent bundle
256  }
257 
258  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
259  _pm.CallFunctorWithTeamThreads<AssembleCurvaturePsqrtW>(this_batch_size, *this);
260 
262  // solves P^T*P against P^T*W with LU, stored in P
263  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
264  GMLS_LinearAlgebra::batchLUFactorize(_pm, _RHS.data(), RHS_square_dim, RHS_square_dim, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, _max_num_neighbors, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
265  Kokkos::Profiling::popRegion();
266  } else {
267  // solves P*sqrt(weights) against sqrt(weights)*Identity with QR, stored in RHS
268  Kokkos::Profiling::pushRegion("Curvature QR Factorization");
269  GMLS_LinearAlgebra::batchQRFactorize(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_square_dim, RHS_square_dim, _max_num_neighbors, manifold_NP, _max_num_neighbors, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
270  Kokkos::Profiling::popRegion();
271  }
272 
273  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
275 
276  // Due to converting layout, entries that are assumed zeros may become non-zeros.
277  Kokkos::deep_copy(_P, 0.0);
278 
279  if (batch_num==number_of_batches-1) {
280  // copy tangent bundle from device back to host
281  _host_T = Kokkos::create_mirror_view(_T);
282  Kokkos::deep_copy(_host_T, _T);
283  }
284  }
285 
286  // this time assembling curvature PsqrtW matrix is using a highly accurate approximation of the tangent, previously calculated
287  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
288  _pm.CallFunctorWithTeamThreads<AssembleCurvaturePsqrtW>(this_batch_size, *this);
289 
291  // solves P^T*P against P^T*W with LU, stored in P
292  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
293  GMLS_LinearAlgebra::batchLUFactorize(_pm, _RHS.data(), RHS_square_dim, RHS_square_dim, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, _max_num_neighbors, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
294  Kokkos::Profiling::popRegion();
295  } else {
296  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
297  Kokkos::Profiling::pushRegion("Curvature QR Factorization");
298  GMLS_LinearAlgebra::batchQRFactorize(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_square_dim, RHS_square_dim, _max_num_neighbors, manifold_NP, _max_num_neighbors, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
299  Kokkos::Profiling::popRegion();
300  }
301 
302  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
303  _pm.CallFunctorWithTeamThreads<ApplyCurvatureTargets>(this_batch_size, *this);
304  Kokkos::fence();
305 
306  // prestencil weights calculated here. appropriate because:
307  // precedes polynomial reconstruction from data (replaces contents of _RHS)
308  // follows reconstruction of geometry
309  // calculate prestencil weights
311 
312  // Due to converting layout, entried that are assumed zeros may become non-zeros.
313  Kokkos::deep_copy(_P, 0.0);
314 
315  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
316  _pm.CallFunctorWithTeamThreads<AssembleManifoldPsqrtW>(this_batch_size, *this);
317 
318  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
319  // uses SVD if necessary or if explicitly asked to do so (much slower than QR)
321  Kokkos::Profiling::pushRegion("Manifold SVD Factorization");
322  GMLS_LinearAlgebra::batchSVDFactorize(_pm, true, _P.data(), P_dim_0, P_dim_1, true, _RHS.data(), RHS_square_dim, RHS_square_dim, max_num_rows, this_num_cols, max_num_rows, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
323  Kokkos::Profiling::popRegion();
324  } else if (_dense_solver_type == DenseSolverType::LU) {
325  Kokkos::Profiling::pushRegion("Manifold LU Factorization");
326  GMLS_LinearAlgebra::batchLUFactorize(_pm, _RHS.data(), RHS_square_dim, RHS_square_dim, _P.data(), P_dim_1, P_dim_0, this_num_cols, this_num_cols, max_num_rows, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
327  Kokkos::Profiling::popRegion();
328  } else {
329  Kokkos::Profiling::pushRegion("Manifold QR Factorization");
330  GMLS_LinearAlgebra::batchQRFactorize(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_square_dim, RHS_square_dim, max_num_rows, this_num_cols, max_num_rows, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
331  Kokkos::Profiling::popRegion();
332  }
333  Kokkos::fence();
334 
335  } else {
336 
337  /*
338  * STANDARD GMLS Problems
339  */
340 
341  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
342  _pm.CallFunctorWithTeamThreads<AssembleStandardPsqrtW>(this_batch_size, *this);
343  //_pm.CallFunctorWithTeamThreads<AssembleStandardPsqrtW>(this_batch_size, *this);
344  Kokkos::fence();
345 
346  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
347  // uses SVD if necessary or if explicitly asked to do so (much slower than QR)
350  Kokkos::Profiling::pushRegion("SVD Factorization");
351  GMLS_LinearAlgebra::batchSVDFactorize(_pm, false, _RHS.data(), RHS_square_dim, RHS_square_dim, true, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _added_alpha_size, this_batch_size, 0, _initial_index_for_batch, NULL);
352  Kokkos::Profiling::popRegion();
353  } else {
354  Kokkos::Profiling::pushRegion("LU Factorization");
355  GMLS_LinearAlgebra::batchLUFactorize(_pm, _RHS.data(), RHS_square_dim, RHS_square_dim, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _added_alpha_size, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
356  Kokkos::Profiling::popRegion();
357  }
358  } else {
359  // Solve normally for no-constraint cases
361  Kokkos::Profiling::pushRegion("SVD Factorization");
362  GMLS_LinearAlgebra::batchSVDFactorize(_pm, true, _P.data(), P_dim_0, P_dim_1, true, _RHS.data(), RHS_square_dim, RHS_square_dim, max_num_rows, this_num_cols, max_num_rows, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
363  Kokkos::Profiling::popRegion();
364  } else if (_dense_solver_type == DenseSolverType::LU) {
365  Kokkos::Profiling::pushRegion("LU Factorization");
366  GMLS_LinearAlgebra::batchLUFactorize(_pm, _RHS.data(), RHS_square_dim, RHS_square_dim, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _added_alpha_size, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
367  Kokkos::Profiling::popRegion();
368  } else {
369  Kokkos::Profiling::pushRegion("QR Factorization");
370  GMLS_LinearAlgebra::batchQRFactorize(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_square_dim, RHS_square_dim, max_num_rows, this_num_cols, max_num_rows, this_batch_size, _max_num_neighbors, _initial_index_for_batch, _host_number_of_neighbors_list.data());
371  Kokkos::Profiling::popRegion();
372  }
373  }
374 
376  Kokkos::fence();
377  }
378 
379  /*
380  * Calculate Optimal Threads Based On Levels of Parallelism
381  */
382 
383 
385 
386  /*
387  * MANIFOLD Problems
388  */
389 
390  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
391  _pm.CallFunctorWithTeamThreads<ApplyManifoldTargets>(this_batch_size, *this);
392 
393  } else {
394 
395  /*
396  * STANDARD GMLS Problems
397  */
398 
399  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
401 
402  }
403  Kokkos::fence();
404  _initial_index_for_batch += this_batch_size;
405  if ((size_t)_initial_index_for_batch == _target_coordinates.extent(0)) break;
406  } // end of batch loops
407 
408  // deallocate _P and _w
409  _w = Kokkos::View<double*>("w",0);
410  if (number_of_batches > 1) { // no reason to keep coefficients if they aren't all in memory
411  _RHS = Kokkos::View<double*>("RHS",0);
412  _P = Kokkos::View<double*>("P",0);
414  } else {
416  _RHS = Kokkos::View<double*>("RHS", 0);
417  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
418  } else {
420  _P = Kokkos::View<double*>("P", 0);
421  if (!keep_coefficients) _RHS = Kokkos::View<double*>("RHS", 0);
422  } else {
423  _RHS = Kokkos::View<double*>("RHS", 0);
424  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
425  }
426  }
427  if (keep_coefficients) _store_PTWP_inv_PTW = true;
428  }
429 
430  /*
431  * Device to Host Copy Of Solution
432  */
433 
434  // copy computed alphas back to the host
435  _host_alphas = Kokkos::create_mirror_view(_alphas);
437  _host_prestencil_weights = Kokkos::create_mirror_view(_prestencil_weights);
438  Kokkos::deep_copy(_host_prestencil_weights, _prestencil_weights);
439  }
440  Kokkos::deep_copy(_host_alphas, _alphas);
441  Kokkos::fence();
442 
443 
444 }
445 
446 void GMLS::generateAlphas(const int number_of_batches, const bool keep_coefficients) {
447 
448  this->generatePolynomialCoefficients(number_of_batches, keep_coefficients);
449 
450 }
451 
452 
453 KOKKOS_INLINE_FUNCTION
454 void GMLS::operator()(const AssembleStandardPsqrtW&, const member_type& teamMember) const {
455 
456  /*
457  * Dimensions
458  */
459 
460  const int target_index = _initial_index_for_batch + teamMember.league_rank();
461  const int local_index = teamMember.league_rank();
462 
463  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
464  const int this_num_rows = _sampling_multiplier*this->getNNeighbors(target_index);
465  const int this_num_cols = _basis_multiplier*_NP;
466 
467  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
468  int P_dim_0, P_dim_1;
469  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
470 
471  /*
472  * Data
473  */
474 
475  scratch_matrix_right_type PsqrtW(_P.data()
476  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
477  scratch_matrix_right_type RHS(_RHS.data()
478  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
479  scratch_vector_type w(_w.data()
480  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
481 
482  // delta, used for each thread
483  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), this_num_cols);
484  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (_poly_order+1)*_global_dimensions);
485 
486  /*
487  * Assemble P*sqrt(W) and sqrt(w)*Identity
488  */
489 
490  // creates the matrix sqrt(W)*P
491  this->createWeightsAndP(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions, _poly_order, true /*weight_p*/, NULL /*&V*/, _reconstruction_space, _polynomial_sampling_functional);
492 
494  // if (_dense_solver_type != DenseSolverType::LU) {
495  // fill in RHS with Identity * sqrt(weights)
496  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this_num_rows), [=] (const int i) {
497  for(int j = 0; j < this_num_rows; ++j) {
498  RHS(j,i) = (i==j) ? std::sqrt(w(i)) : 0;
499  }
500  });
501  } else {
502  // create global memory for matrix M = PsqrtW^T*PsqrtW
503  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
505  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
506  GMLS_LinearAlgebra::createM(teamMember, M, PsqrtW, this_num_cols /* # of columns */, max_num_rows);
507 
508  // Multiply PsqrtW with sqrt(W) to get PW
509  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, max_num_rows), [=] (const int i) {
510  for (int j=0; j < this_num_cols; j++) {
511  PsqrtW(i, j) = PsqrtW(i, j)*std::sqrt(w(i));
512  }
513  });
514  teamMember.team_barrier();
515 
516  // conditionally fill in rows determined by constraint type
518  // normal vector is contained in last row of T
521 
522  // Get the number of neighbors for target index
523  int num_neigh_target = this->getNNeighbors(target_index);
524  double cutoff_p = _epsilons(target_index);
525 
526  evaluateConstraints(M, PsqrtW, _constraint_type, _reconstruction_space, _NP, cutoff_p, _dimensions, num_neigh_target, &T);
527  }
528  }
529 }
530 
531 
532 KOKKOS_INLINE_FUNCTION
533 void GMLS::operator()(const ApplyStandardTargets&, const member_type& teamMember) const {
534 
535  /*
536  * Dimensions
537  */
538 
539  const int local_index = teamMember.league_rank();
540 
541  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
542  const int this_num_cols = _basis_multiplier*_NP;
543 
544  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
545  int P_dim_0, P_dim_1;
546  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
547 
548  /*
549  * Data
550  */
551 
552  // Coefficients for polynomial basis have overwritten _RHS
554  // if (_dense_solver_type != DenseSolverType::LU) {
556  Coeffs = scratch_matrix_right_type(_RHS.data()
557  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
558  } else {
559  Coeffs = scratch_matrix_right_type(_P.data()
560  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
561  }
562  scratch_vector_type w(_w.data()
563  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
564 
565  scratch_vector_type t1(teamMember.team_scratch(_pm.getTeamScratchLevel(0)), max_num_rows);
566  scratch_vector_type t2(teamMember.team_scratch(_pm.getTeamScratchLevel(0)), max_num_rows);
567  scratch_matrix_right_type P_target_row(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _total_alpha_values*_max_evaluation_sites_per_target, this_num_cols);
568 
569  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), this_num_cols);
570  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (_poly_order+1)*_global_dimensions);
571 
572  /*
573  * Apply Standard Target Evaluations to Polynomial Coefficients
574  */
575 
576  // get evaluation of target functionals
577  this->computeTargetFunctionals(teamMember, delta, thread_workspace, P_target_row);
578  teamMember.team_barrier();
579 
580  this->applyTargetsToCoefficients(teamMember, t1, t2, Coeffs, w, P_target_row, _NP);
581  teamMember.team_barrier();
582 }
583 
584 
585 KOKKOS_INLINE_FUNCTION
586 void GMLS::operator()(const ComputeCoarseTangentPlane&, const member_type& teamMember) const {
587 
588  /*
589  * Dimensions
590  */
591 
592  const int target_index = _initial_index_for_batch + teamMember.league_rank();
593  const int local_index = teamMember.league_rank();
594 
595  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
597  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
598  const int this_num_cols = _basis_multiplier*max_manifold_NP;
599  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
600 
601  int P_dim_0, P_dim_1;
602  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
603 
604  /*
605  * Data
606  */
607 
608  scratch_matrix_right_type PsqrtW(_P.data()
609  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
610  scratch_vector_type w(_w.data()
611  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
612  scratch_matrix_right_type T(_T.data()
614 
615  scratch_matrix_right_type PTP(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _dimensions, _dimensions);
616 
617  // delta, used for each thread
618  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
619  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
620 
621  /*
622  * Determine Coarse Approximation of Manifold Tangent Plane
623  */
624 
625  // getting x y and z from which to derive a manifold
626  this->createWeightsAndPForCurvature(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions, true /* only specific order */);
627 
628  // create PsqrtW^T*PsqrtW
629  GMLS_LinearAlgebra::createM(teamMember, PTP, PsqrtW, _dimensions /* # of columns */, this->getNNeighbors(target_index));
630 
631  // create coarse approximation of tangent plane in first two rows of T, with normal direction in third column
633  const_cast<pool_type&>(_random_number_pool));
634 
635  teamMember.team_barrier();
636 
637 }
638 
639 KOKKOS_INLINE_FUNCTION
640 void GMLS::operator()(const AssembleCurvaturePsqrtW&, const member_type& teamMember) const {
641 
642  /*
643  * Dimensions
644  */
645 
646  const int target_index = _initial_index_for_batch + teamMember.league_rank();
647  const int local_index = teamMember.league_rank();
648 
649  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
651  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
652  const int this_num_neighbors = this->getNNeighbors(target_index);
653  const int this_num_cols = _basis_multiplier*max_manifold_NP;
654  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
655 
656  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
657  int P_dim_0, P_dim_1;
658  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
659 
660  /*
661  * Data
662  */
663 
664  scratch_matrix_right_type CurvaturePsqrtW(_P.data()
665  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
666  scratch_matrix_right_type RHS(_RHS.data()
667  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
668  scratch_vector_type w(_w.data()
669  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
670  scratch_matrix_right_type T(_T.data()
672 
673  // delta, used for each thread
674  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
675  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
676 
677 
678  //
679  // RECONSTRUCT ON THE TANGENT PLANE USING LOCAL COORDINATES
680  //
681 
682  // creates the matrix sqrt(W)*P
683  this->createWeightsAndPForCurvature(teamMember, delta, thread_workspace, CurvaturePsqrtW, w, _dimensions-1, false /* only specific order */, &T);
684  teamMember.team_barrier();
685 
686  // CurvaturePsqrtW is sized according to max_num_rows x this_num_cols of which in this case
687  // we are only using this_num_neighbors x manifold_NP
688 
690  // fill in RHS with Identity * sqrt(weights)
691  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this_num_neighbors), [=] (const int i) {
692  for(int j = 0; j < this_num_neighbors; ++j) {
693  RHS(i, j) = (i==j) ? std::sqrt(w(i)) : 0;
694  }
695  });
696  } else {
697  // create global memory for matrix M = PsqrtW^T*PsqrtW
698  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
700  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
701  // Assemble matrix M
702  GMLS_LinearAlgebra::createM(teamMember, M, CurvaturePsqrtW, manifold_NP /* # of columns */, this_num_neighbors);
703 
704  // Multiply PsqrtW with sqrt(W) to get PW
705  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, this_num_neighbors), [=] (const int i) {
706  for (int j=0; j < manifold_NP; j++) {
707  CurvaturePsqrtW(i, j) = CurvaturePsqrtW(i, j)*std::sqrt(w(i));
708  }
709  });
710  }
711  teamMember.team_barrier();
712 }
713 
714 KOKKOS_INLINE_FUNCTION
715 void GMLS::operator()(const GetAccurateTangentDirections&, const member_type& teamMember) const {
716 
717  /*
718  * Dimensions
719  */
720 
721  const int target_index = _initial_index_for_batch + teamMember.league_rank();
722  const int local_index = teamMember.league_rank();
723 
724  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
726  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
727  const int this_num_cols = _basis_multiplier*max_manifold_NP;
728  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
729 
730  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
731  int P_dim_0, P_dim_1;
732  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
733 
734  /*
735  * Data
736  */
739  // Solution from QR comes from RHS
741  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
742  } else {
743  // Solution from LU comes from P
744  Q = scratch_matrix_right_type(_P.data()
745  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
746  }
747 
748  scratch_matrix_right_type T(_T.data()
750 
751  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
753 
754  // delta, used for each thread
755  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
756  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
757 
758  /*
759  * Manifold
760  */
761 
762 
763  //
764  // GET TARGET COEFFICIENTS RELATED TO GRADIENT TERMS
765  //
766  // reconstruct grad_xi1 and grad_xi2, not used for manifold_coeffs
767  this->computeCurvatureFunctionals(teamMember, delta, thread_workspace, P_target_row, &T);
768  teamMember.team_barrier();
769 
770  double grad_xi1 = 0, grad_xi2 = 0;
771  for (int i=0; i<this->getNNeighbors(target_index); ++i) {
772  for (int k=0; k<_dimensions-1; ++k) {
773  double alpha_ij = 0;
774  int offset = getTargetOffsetIndexDevice(0, 0, k, 0);
775  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
776  manifold_NP), [=] (const int l, double &talpha_ij) {
777  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
778  talpha_ij += P_target_row(offset,l)*Q(l,i);
779  });
780  }, alpha_ij);
781  teamMember.team_barrier();
782  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
783  manifold_gradient(i*(_dimensions-1) + k) = alpha_ij; // stored staggered, grad_xi1, grad_xi2, grad_xi1, grad_xi2, ....
784  });
785  }
786  teamMember.team_barrier();
787 
788  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
789  double normal_coordinate = rel_coord[_dimensions-1];
790 
791  // apply coefficients to sample data
792  grad_xi1 += manifold_gradient(i*(_dimensions-1)) * normal_coordinate;
793  if (_dimensions>2) grad_xi2 += manifold_gradient(i*(_dimensions-1)+1) * normal_coordinate;
794  teamMember.team_barrier();
795  }
796 
797  // Constructs high order orthonormal tangent space T and inverse of metric tensor
798  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
799 
800  double grad_xi[2] = {grad_xi1, grad_xi2};
801  double T_row[3];
802 
803  // Construct T (high order approximation of orthonormal tangent vectors)
804  for (int i=0; i<_dimensions-1; ++i) {
805  for (int j=0; j<_dimensions; ++j) {
806  T_row[j] = T(i,j);
807  }
808  // build
809  for (int j=0; j<_dimensions; ++j) {
810  T(i,j) = grad_xi[i]*T(_dimensions-1,j);
811  T(i,j) += T_row[j];
812  }
813  }
814 
815  // calculate norm
816  double norm = 0;
817  for (int j=0; j<_dimensions; ++j) {
818  norm += T(0,j)*T(0,j);
819  }
820 
821  // normalize first vector
822  norm = std::sqrt(norm);
823  for (int j=0; j<_dimensions; ++j) {
824  T(0,j) /= norm;
825  }
826 
827  // orthonormalize next vector
828  if (_dimensions-1 == 2) { // 2d manifold
829  double dot_product = T(0,0)*T(1,0) + T(0,1)*T(1,1) + T(0,2)*T(1,2);
830  for (int j=0; j<_dimensions; ++j) {
831  T(1,j) -= dot_product*T(0,j);
832  }
833  // normalize second vector
834  norm = 0;
835  for (int j=0; j<_dimensions; ++j) {
836  norm += T(1,j)*T(1,j);
837  }
838  norm = std::sqrt(norm);
839  for (int j=0; j<_dimensions; ++j) {
840  T(1,j) /= norm;
841  }
842  }
843 
844  // get normal vector to first two rows of T
845  double norm_t_normal = 0;
846  if (_dimensions>2) {
847  T(_dimensions-1,0) = T(0,1)*T(1,2) - T(1,1)*T(0,2);
848  norm_t_normal += T(_dimensions-1,0)*T(_dimensions-1,0);
849  T(_dimensions-1,1) = -(T(0,0)*T(1,2) - T(1,0)*T(0,2));
850  norm_t_normal += T(_dimensions-1,1)*T(_dimensions-1,1);
851  T(_dimensions-1,2) = T(0,0)*T(1,1) - T(1,0)*T(0,1);
852  norm_t_normal += T(_dimensions-1,2)*T(_dimensions-1,2);
853  } else {
854  T(_dimensions-1,0) = T(1,1) - T(0,1);
855  norm_t_normal += T(_dimensions-1,0)*T(_dimensions-1,0);
856  T(_dimensions-1,1) = T(0,0) - T(1,0);
857  norm_t_normal += T(_dimensions-1,1)*T(_dimensions-1,1);
858  }
859  norm_t_normal = std::sqrt(norm_t_normal);
860  for (int i=0; i<_dimensions-1; ++i) {
861  T(_dimensions-1,i) /= norm_t_normal;
862  }
863  });
864  teamMember.team_barrier();
865 }
866 
867 KOKKOS_INLINE_FUNCTION
868 void GMLS::operator()(const FixTangentDirectionOrdering&, const member_type& teamMember) const {
869 
870  /*
871  * Dimensions
872  */
873 
874  const int target_index = _initial_index_for_batch + teamMember.league_rank();
875 
876  /*
877  * Data
878  */
879 
881  scratch_vector_type N(_ref_N.data() + target_index*_dimensions, _dimensions);
882 
883  // take the dot product of the calculated normal in the tangent bundle with a given reference outward normal
884  // direction provided by the user. if the dot product is negative, flip the tangent vector ordering
885  // and flip the sign on the normal direction.
886  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
887  compadre_kernel_assert_debug(_dimensions > 1 && "FixTangentDirectionOrder called on manifold with a dimension of 0.");
888  double dot_product = 0;
889  for (int i=0; i<_dimensions; ++i) {
890  dot_product += T(_dimensions-1,i) * N(i);
891 
892  }
893  if (dot_product < 0) {
894  if (_dimensions==3) {
895  for (int i=0; i<_dimensions; ++i) {
896  // swap tangent directions
897  double tmp = T(0,i);
898  T(0,i) = T(1,i);
899  T(1,i) = tmp;
900  }
901  }
902  for (int i=0; i<_dimensions; ++i) {
903  // flip the sign of the normal vector
904  T(_dimensions-1,i) *= -1;
905 
906  }
907  }
908  });
909  teamMember.team_barrier();
910 }
911 
912 KOKKOS_INLINE_FUNCTION
913 void GMLS::operator()(const ApplyCurvatureTargets&, const member_type& teamMember) const {
914 
915  /*
916  * Dimensions
917  */
918 
919  const int target_index = _initial_index_for_batch + teamMember.league_rank();
920  const int local_index = teamMember.league_rank();
921 
922  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
924  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
925  const int this_num_cols = _basis_multiplier*max_manifold_NP;
926  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
927 
928  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
929  int P_dim_0, P_dim_1;
930  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
931 
932  /*
933  * Data
934  */
937  // Solution from QR comes from RHS
939  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
940  } else {
941  // Solution from LU comes from P
942  Q = scratch_matrix_right_type(_P.data()
943  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
944  }
945 
946  scratch_matrix_right_type T(_T.data()
948 
951  scratch_vector_type manifold_coeffs(_manifold_curvature_coefficients.data() + target_index*manifold_NP, manifold_NP);
952  scratch_vector_type manifold_gradient_coeffs(_manifold_curvature_gradient.data() + target_index*(_dimensions-1), (_dimensions-1));
953 
954  scratch_matrix_right_type G(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), _dimensions-1, _dimensions-1);
955  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
957 
958  // delta, used for each thread
959  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
960  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
961 
962  /*
963  * Manifold
964  */
965 
966 
967  //
968  // GET TARGET COEFFICIENTS RELATED TO GRADIENT TERMS
969  //
970  // reconstruct grad_xi1 and grad_xi2, not used for manifold_coeffs
971  this->computeCurvatureFunctionals(teamMember, delta, thread_workspace, P_target_row, &T);
972  teamMember.team_barrier();
973 
974  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
975  for (int j=0; j<manifold_NP; ++j) { // set to zero
976  manifold_coeffs(j) = 0;
977  }
978  });
979  teamMember.team_barrier();
980 
981  double grad_xi1 = 0, grad_xi2 = 0;
982  for (int i=0; i<this->getNNeighbors(target_index); ++i) {
983  for (int k=0; k<_dimensions-1; ++k) {
984  double alpha_ij = 0;
985  int offset = getTargetOffsetIndexDevice(0, 0, k, 0);
986  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
987  manifold_NP), [=] (const int l, double &talpha_ij) {
988  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
989  talpha_ij += P_target_row(offset,l)*Q(l,i);
990  });
991  }, alpha_ij);
992  teamMember.team_barrier();
993  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
994  manifold_gradient(i*(_dimensions-1) + k) = alpha_ij; // stored staggered, grad_xi1, grad_xi2, grad_xi1, grad_xi2, ....
995  });
996  }
997  teamMember.team_barrier();
998 
999  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1000  double normal_coordinate = rel_coord[_dimensions-1];
1001 
1002  // apply coefficients to sample data
1003  grad_xi1 += manifold_gradient(i*(_dimensions-1)) * normal_coordinate;
1004  if (_dimensions>2) grad_xi2 += manifold_gradient(i*(_dimensions-1)+1) * normal_coordinate;
1005 
1006  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1007  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1008  // coefficients without a target premultiplied
1009  for (int j=0; j<manifold_NP; ++j) {
1010  manifold_coeffs(j) += Q(j,i) * normal_coordinate;
1011  }
1012  });
1013  });
1014  teamMember.team_barrier();
1015  }
1016 
1017  // Constructs high order orthonormal tangent space T and inverse of metric tensor
1018  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1019 
1020  manifold_gradient_coeffs(0) = grad_xi1;
1021  if (_dimensions>2) manifold_gradient_coeffs(1) = grad_xi2;
1022 
1023  // need to get 2x2 matrix of metric tensor
1024  G(0,0) = 1 + grad_xi1*grad_xi1;
1025 
1026  if (_dimensions>2) {
1027  G(0,1) = grad_xi1*grad_xi2;
1028  G(1,0) = grad_xi2*grad_xi1;
1029  G(1,1) = 1 + grad_xi2*grad_xi2;
1030  }
1031 
1032  double G_determinant;
1033  if (_dimensions==2) {
1034  G_determinant = G(0,0);
1035  compadre_kernel_assert_debug((G_determinant!=0) && "Determinant is zero.");
1036  G_inv(0,0) = 1/G_determinant;
1037  } else {
1038  G_determinant = G(0,0)*G(1,1) - G(0,1)*G(1,0); //std::sqrt(G_inv(0,0)*G_inv(1,1) - G_inv(0,1)*G_inv(1,0));
1039  compadre_kernel_assert_debug((G_determinant!=0) && "Determinant is zero.");
1040  {
1041  // inverse of 2x2
1042  G_inv(0,0) = G(1,1)/G_determinant;
1043  G_inv(1,1) = G(0,0)/G_determinant;
1044  G_inv(0,1) = -G(0,1)/G_determinant;
1045  G_inv(1,0) = -G(1,0)/G_determinant;
1046  }
1047  }
1048 
1049  });
1050  teamMember.team_barrier();
1051  //
1052  // END OF MANIFOLD METRIC CALCULATIONS
1053  //
1054 }
1055 
1056 KOKKOS_INLINE_FUNCTION
1057 void GMLS::operator()(const AssembleManifoldPsqrtW&, const member_type& teamMember) const {
1058 
1059  /*
1060  * Dimensions
1061  */
1062 
1063  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1064  const int local_index = teamMember.league_rank();
1065 
1066  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1068  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1069  const int this_num_rows = _sampling_multiplier*this->getNNeighbors(target_index);
1070  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1071  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1072 
1073  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
1074  int P_dim_0, P_dim_1;
1075  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1076 
1077  /*
1078  * Data
1079  */
1080 
1081  scratch_matrix_right_type PsqrtW(_P.data()
1082  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1), P_dim_0, P_dim_1);
1084  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
1085  scratch_vector_type w(_w.data()
1086  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
1087  scratch_matrix_right_type T(_T.data()
1089 
1090  // delta, used for each thread
1091  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
1092  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1093 
1094 
1095  /*
1096  * Manifold
1097  */
1098 
1099  this->createWeightsAndP(teamMember, delta, thread_workspace, PsqrtW, w, _dimensions-1, _poly_order, true /* weight with W*/, &T, _reconstruction_space, _polynomial_sampling_functional);
1100  teamMember.team_barrier();
1101 
1103  // fill in RHS with Identity * sqrt(weights)
1104  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this_num_rows), [=] (const int i) {
1105  for(int j = 0; j < this_num_rows; ++j) {
1106  Q(i, j) = (i==j) ? std::sqrt(w(i)) : 0;
1107  }
1108  });
1109  } else {
1110  // create global memory for matrix M = PsqrtW^T*PsqrtW
1111  // don't need to cast into scratch_matrix_left_type since the matrix is symmetric
1113  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
1114  // Assemble matrix M
1115  GMLS_LinearAlgebra::createM(teamMember, M, PsqrtW, this_num_cols /* # of columns */, max_num_rows);
1116 
1117  // Multiply PsqrtW with sqrt(W) to get PW
1118  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, max_num_rows), [=] (const int i) {
1119  for (int j=0; j < this_num_cols; j++) {
1120  PsqrtW(i, j) = PsqrtW(i, j)*std::sqrt(w(i));
1121  }
1122  });
1123  }
1124  teamMember.team_barrier();
1125 }
1126 
1127 KOKKOS_INLINE_FUNCTION
1128 void GMLS::operator()(const ApplyManifoldTargets&, const member_type& teamMember) const {
1129 
1130  /*
1131  * Dimensions
1132  */
1133 
1134  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1135  const int local_index = teamMember.league_rank();
1136 
1137  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1139  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1140  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1141  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1142 
1143  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
1144  int P_dim_0, P_dim_1;
1145  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1146 
1147  /*
1148  * Data
1149  */
1150 
1153  Coeffs = scratch_matrix_right_type(_RHS.data()
1154  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
1155  } else {
1156  Coeffs = scratch_matrix_right_type(_P.data()
1157  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
1158  }
1159  scratch_vector_type w(_w.data()
1160  + TO_GLOBAL(local_index)*TO_GLOBAL(max_num_rows), max_num_rows);
1161  scratch_matrix_right_type T(_T.data()
1163 
1165  + TO_GLOBAL(target_index)*TO_GLOBAL((_dimensions-1))*TO_GLOBAL((_dimensions-1)), _dimensions-1, _dimensions-1);
1166  scratch_vector_type manifold_coeffs(_manifold_curvature_coefficients.data() + target_index*manifold_NP, manifold_NP);
1167  scratch_vector_type manifold_gradient_coeffs(_manifold_curvature_gradient.data() + target_index*(_dimensions-1), (_dimensions-1));
1168 
1172 
1173  // delta, used for each thread
1174  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), max_manifold_NP*_basis_multiplier);
1175  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1176 
1177  /*
1178  * Apply Standard Target Evaluations to Polynomial Coefficients
1179  */
1180 
1181  this->computeTargetFunctionalsOnManifold(teamMember, delta, thread_workspace, P_target_row, T, G_inv, manifold_coeffs, manifold_gradient_coeffs);
1182  teamMember.team_barrier();
1183 
1184  this->applyTargetsToCoefficients(teamMember, t1, t2, Coeffs, w, P_target_row, _NP);
1185 
1186  teamMember.team_barrier();
1187 }
1188 
1189 
1190 KOKKOS_INLINE_FUNCTION
1191 void GMLS::operator()(const ComputePrestencilWeights&, const member_type& teamMember) const {
1192 
1193  /*
1194  * Dimensions
1195  */
1196 
1197  const int target_index = _initial_index_for_batch + teamMember.league_rank();
1198  const int local_index = teamMember.league_rank();
1199 
1200  const int max_num_rows = _sampling_multiplier*_max_num_neighbors;
1202  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
1203  const int this_num_cols = _basis_multiplier*max_manifold_NP;
1204  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
1205 
1206  int RHS_square_dim = getRHSSquareDim(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols);
1207  int P_dim_0, P_dim_1;
1208  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
1209 
1210  /*
1211  * Data
1212  */
1213 
1214 
1217 
1218  // holds polynomial coefficients for curvature reconstruction
1221  // Solution from QR comes from RHS
1222  Q = scratch_matrix_right_type(_RHS.data()
1223  + TO_GLOBAL(local_index)*TO_GLOBAL(RHS_square_dim)*TO_GLOBAL(RHS_square_dim), RHS_square_dim, RHS_square_dim);
1224  } else {
1225  // Solution from LU comes from P
1226  Q = scratch_matrix_right_type(_P.data()
1227  + TO_GLOBAL(local_index)*TO_GLOBAL(P_dim_1)*TO_GLOBAL(P_dim_0), P_dim_1, P_dim_0);
1228  }
1229 
1230  scratch_matrix_right_type T(_T.data()
1232 
1233  scratch_vector_type manifold_gradient(teamMember.team_scratch(_pm.getTeamScratchLevel(1)), (_dimensions-1)*_max_num_neighbors);
1235 
1236  /*
1237  * Prestencil Weight Calculations
1238  */
1239 
1241  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1242  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1243  _prestencil_weights(0,0,0,0,0) = -1;
1244  _prestencil_weights(1,0,0,0,0) = 1;
1245  });
1246  });
1248  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
1249  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1250  for (int j=0; j<_dimensions; ++j) {
1251  for (int k=0; k<_dimensions-1; ++k) {
1252  _prestencil_weights(0,target_index,0,k,j) = T(k,j);
1253  }
1254  }
1255  });
1256  });
1258  compadre_kernel_assert_debug(_problem_type==ProblemType::MANIFOLD && "StaggeredEdgeIntegralSample prestencil weight only written for manifolds.");
1259  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [=] (const int m) {
1260  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1261  for (int quadrature = 0; quadrature<_qm.getNumberOfQuadraturePoints(); ++quadrature) {
1262  XYZ tangent_quadrature_coord_2d;
1263  for (int j=0; j<_dimensions-1; ++j) {
1264  tangent_quadrature_coord_2d[j] = getTargetCoordinate(target_index, j, &T);
1265  tangent_quadrature_coord_2d[j] -= getNeighborCoordinate(target_index, m, j, &T);
1266  }
1267  double tangent_vector[3];
1268  tangent_vector[0] = tangent_quadrature_coord_2d[0]*T(0,0) + tangent_quadrature_coord_2d[1]*T(1,0);
1269  tangent_vector[1] = tangent_quadrature_coord_2d[0]*T(0,1) + tangent_quadrature_coord_2d[1]*T(1,1);
1270  tangent_vector[2] = tangent_quadrature_coord_2d[0]*T(0,2) + tangent_quadrature_coord_2d[1]*T(1,2);
1271 
1272  for (int j=0; j<_dimensions; ++j) {
1273  _prestencil_weights(0,target_index,m,0,j) += (1-_qm.getSite(quadrature,0))*tangent_vector[j]*_qm.getWeight(quadrature);
1274  _prestencil_weights(1,target_index,m,0,j) += _qm.getSite(quadrature,0)*tangent_vector[j]*_qm.getWeight(quadrature);
1275  }
1276  }
1277  });
1278  });
1280 
1281  scratch_vector_type delta(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), manifold_NP);
1282  scratch_vector_type thread_workspace(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), (max_poly_order+1)*_global_dimensions);
1283 
1284  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [&] (const int m) {
1285 
1286  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1287  this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, m, 0 /*alpha*/, 0 /*partial_direction*/, _dimensions-1, _curvature_poly_order, false /*specific order only*/, &T, ReconstructionSpace::ScalarTaylorPolynomial, PointSample);
1288  });
1289  // reconstructs gradient at local neighbor index m
1290  double grad_xi1 = 0, grad_xi2 = 0;
1291  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,this->getNNeighbors(target_index)), [=] (const int i, double &t_grad_xi1) {
1292  double alpha_ij = 0;
1293  for (int l=0; l<manifold_NP; ++l) {
1294  alpha_ij += delta(l)*Q(l,i);
1295  }
1296  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1297  double normal_coordinate = rel_coord[_dimensions-1];
1298 
1299  // apply coefficients to sample data
1300  t_grad_xi1 += alpha_ij * normal_coordinate;
1301  }, grad_xi1);
1302  t1(m) = grad_xi1;
1303 
1304  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1305  this->calcGradientPij(teamMember, delta.data(), thread_workspace.data(), target_index, m, 0 /*alpha*/, 1 /*partial_direction*/, _dimensions-1, _curvature_poly_order, false /*specific order only*/, &T, ReconstructionSpace::ScalarTaylorPolynomial, PointSample);
1306  });
1307  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,this->getNNeighbors(target_index)), [=] (const int i, double &t_grad_xi2) {
1308  double alpha_ij = 0;
1309  for (int l=0; l<manifold_NP; ++l) {
1310  alpha_ij += delta(l)*Q(l,i);
1311  }
1312  XYZ rel_coord = getRelativeCoord(target_index, i, _dimensions, &T);
1313  double normal_coordinate = rel_coord[_dimensions-1];
1314 
1315  // apply coefficients to sample data
1316  if (_dimensions>2) t_grad_xi2 += alpha_ij * normal_coordinate;
1317  }, grad_xi2);
1318  t2(m) = grad_xi2;
1319  });
1320 
1321  teamMember.team_barrier();
1322 
1323  scratch_matrix_right_type tangent(teamMember.thread_scratch(_pm.getThreadScratchLevel(1)), _dimensions-1, _dimensions);
1324 
1325  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,this->getNNeighbors(target_index)), [=] (const int m) {
1326  // constructs tangent vector at neighbor site
1327  Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
1328  for (int j=0; j<_dimensions; ++j) {
1329  tangent(0,j) = t1(m)*T(_dimensions-1,j) + T(0,j);
1330  tangent(1,j) = t2(m)*T(_dimensions-1,j) + T(1,j);
1331  }
1332 
1333  // calculate norm
1334  double norm = 0;
1335  for (int j=0; j<_dimensions; ++j) {
1336  norm += tangent(0,j)*tangent(0,j);
1337  }
1338 
1339  // normalize first vector
1340  norm = std::sqrt(norm);
1341  for (int j=0; j<_dimensions; ++j) {
1342  tangent(0,j) /= norm;
1343  }
1344 
1345  // orthonormalize next vector
1346  if (_dimensions-1 == 2) { // 2d manifold
1347  double dot_product = tangent(0,0)*tangent(1,0) + tangent(0,1)*tangent(1,1) + tangent(0,2)*tangent(1,2);
1348  for (int j=0; j<_dimensions; ++j) {
1349  tangent(1,j) -= dot_product*tangent(0,j);
1350  }
1351  // normalize second vector
1352  norm = 0;
1353  for (int j=0; j<_dimensions; ++j) {
1354  norm += tangent(1,j)*tangent(1,j);
1355  }
1356  norm = std::sqrt(norm);
1357  for (int j=0; j<_dimensions; ++j) {
1358  tangent(1,j) /= norm;
1359  }
1360  }
1361 
1362  // stores matrix of tangent and normal directions as a prestencil weight
1363  for (int j=0; j<_dimensions; ++j) {
1364  for (int k=0; k<_dimensions-1; ++k) {
1365  _prestencil_weights(0,target_index,m,k,j) = tangent(k,j);
1366  }
1367  }
1368  });
1369  });
1370  }
1371  teamMember.team_barrier();
1372 }
1373 
1374 } // Compadre
Kokkos::View< double * > _manifold_metric_tensor_inverse
metric tensor inverse for all problems
Kokkos::View< double *, layout_right > _alphas
generated alpha coefficients (device)
int calculateBasisMultiplier(const ReconstructionSpace rs) const
Calculate basis_multiplier.
KOKKOS_INLINE_FUNCTION int getVectorLanesPerThread() const
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getAdditionalAlphaSizeFromConstraint(DenseSolverType dense_solver_type, ConstraintType constraint_type)
Kokkos::View< const double *****, layout_right >::HostMirror _host_prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
Kokkos::View< TargetOperation * > _operations
vector containing target functionals to be applied for reconstruction problem (device) ...
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
bool _entire_batch_computed_at_once
whether entire calculation was computed at once the alternative is that it was broken up over many sm...
int _max_evaluation_sites_per_target
maximum number of evaluation sites for each target (includes target site)
Kokkos::View< double * > _manifold_curvature_coefficients
curvature polynomial coefficients for all problems
void generatePolynomialCoefficients(const int number_of_batches=1, const bool keep_coefficients=false)
Generates polynomial coefficients by setting up and solving least squares problems ! Sets up the batc...
Tag for functor to evaluate curvature targets and apply to coefficients of curvature reconstruction...
Kokkos::View< double * > _ref_N
Rank 2 tensor for high order approximation of tangent vectors for all problems.
constexpr SamplingFunctional ManifoldVectorPointSample
Point evaluations of the entire vector source function (but on a manifold, so it includes a transform...
bool _reference_outward_normal_direction_provided
whether or not the reference outward normal directions were provided by the user. ...
int getOutputDimensionOfSampling(SamplingFunctional sro) const
Dimensions ^ output rank for sampling operation (always in local chart if on a manifold, never ambient space)
team_policy::member_type member_type
KOKKOS_INLINE_FUNCTION int getAdditionalCoeffSizeFromConstraintAndSpace(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension)
Neumann Gradient Scalar Type.
int _NP
dimension of basis for polynomial reconstruction
KOKKOS_INLINE_FUNCTION void createWeightsAndP(const member_type &teamMember, scratch_vector_type delta, scratch_vector_type thread_workspace, scratch_matrix_right_type P, scratch_vector_type w, const int dimension, int polynomial_order, bool weight_p=false, scratch_matrix_right_type *V=NULL, const ReconstructionSpace reconstruction_space=ReconstructionSpace::ScalarTaylorPolynomial, const SamplingFunctional sampling_strategy=PointSample) const
Fills the _P matrix with either P or P*sqrt(w)
int _total_alpha_values
used for sizing P_target_row and the _alphas view
Kokkos::View< double * > _epsilons
h supports determined through neighbor search (device)
bool _orthonormal_tangent_space_provided
whether or not the orthonormal tangent directions were provided by the user.
KOKKOS_INLINE_FUNCTION int getTargetOffsetIndexDevice(const int lro_num, const int input_component, const int output_component, const int additional_evaluation_local_index=0) const
Handles offset from operation input/output + extra evaluation sites.
KOKKOS_INLINE_FUNCTION void largestTwoEigenvectorsThreeByThreeSymmetric(const member_type &teamMember, scratch_matrix_right_type V, scratch_matrix_right_type PtP, const int dimensions, pool_type &random_number_pool)
Calculates two eigenvectors corresponding to two dominant eigenvalues.
Kokkos::View< int *, host_memory_space > _host_number_of_neighbors_list
convenient copy on host of number of neighbors
bool _use_reference_outward_normal_direction_provided_to_orient_surface
whether or not to use reference outward normal directions to orient the surface in a manifold problem...
Kokkos::View< double * > _RHS
sqrt(w)*Identity matrix for all problems, later holds polynomial coefficients for all problems ...
KOKKOS_INLINE_FUNCTION void calcGradientPij(const member_type &teamMember, double *delta, double *thread_workspace, const int target_index, int neighbor_index, const double alpha, const int partial_direction, const int dimension, const int poly_order, bool specific_order_only, const scratch_matrix_right_type *V, const ReconstructionSpace reconstruction_space, const SamplingFunctional sampling_strategy, const int additional_evaluation_local_index=0) const
Evaluates the gradient of a polynomial basis under the Dirac Delta (pointwise) sampling function...
Kokkos::View< double * > _w
contains weights for all problems
int _sampling_multiplier
actual dimension of the sampling functional e.g.
int _curvature_poly_order
order of basis for curvature reconstruction
constexpr SamplingFunctional VaryingManifoldVectorPointSample
For integrating polynomial dotted with normal over an edge.
Kokkos::View< double **, layout_right > _target_coordinates
coordinates for target sites for reconstruction (device)
Each target applies a different data transform, but the same to each neighbor.
int _data_sampling_multiplier
effective dimension of the data sampling functional e.g.
Each target applies a different transform for each neighbor.
NeighborLists< Kokkos::View< int * > > _neighbor_lists
Accessor to get neighbor list data, offset data, and number of neighbors per target.
Quadrature _qm
manages and calculates quadrature
Kokkos::View< double * >::HostMirror _host_T
tangent vectors information (host)
KOKKOS_INLINE_FUNCTION double getWeight(const int index) const
std::vector< TargetOperation > _lro
vector of user requested target operations
constexpr SamplingFunctional StaggeredEdgeAnalyticGradientIntegralSample
Analytical integral of a gradient source vector is just a difference of the scalar source at neighbor...
Scalar polynomial basis centered at the target site and scaled by sum of basis powers e...
Solve GMLS problem on a manifold (will use QR or SVD to solve the resultant GMLS problem dependent on...
KOKKOS_INLINE_FUNCTION double getSite(const int index, const int component) const
int _max_num_neighbors
maximum number of neighbors over all target sites
KOKKOS_INLINE_FUNCTION int getThreadsPerTeam(const int vector_lanes_per_thread=1) const
void batchLUFactorize(ParallelManager pm, double *P, int lda, int nda, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of LU problems.
const SamplingFunctional _data_sampling_functional
generally the same as _polynomial_sampling_functional, but can differ if specified at GMLS class inst...
Tag for functor to evaluate targets, apply target evaluation to polynomial coefficients to store in _...
DenseSolverType _dense_solver_type
solver type for GMLS problem - can be QR, SVD or LU
QR factorization performed on P*sqrt(w) matrix.
Tag for functor to create a coarse tangent approximation from a given neighborhood of points...
pool_type _random_number_pool
Kokkos::View< double * > _manifold_curvature_gradient
_dimension-1 gradient values for curvature for all problems
Kokkos::View< double *****, layout_right > _prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
#define TO_GLOBAL(variable)
KOKKOS_INLINE_FUNCTION int getNumberOfTargets() const
Get number of total targets having neighborhoods (host/device).
int _initial_index_for_batch
initial index for current batch
static KOKKOS_INLINE_FUNCTION int getNP(const int m, const int dimension=3, const ReconstructionSpace r_space=ReconstructionSpace::ScalarTaylorPolynomial)
Returns size of the basis for a given polynomial order and dimension General to dimension 1...
KOKKOS_INLINE_FUNCTION double getNeighborCoordinate(const int target_index, const int neighbor_list_num, const int dim, const scratch_matrix_right_type *V=NULL) const
Returns one component of the neighbor coordinate for a particular target.
KOKKOS_INLINE_FUNCTION double getTargetCoordinate(const int target_index, const int dim, const scratch_matrix_right_type *V=NULL) const
Returns one component of the target coordinate for a particular target.
KOKKOS_INLINE_FUNCTION void computeTargetFunctionalsOnManifold(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row, scratch_matrix_right_type V, scratch_matrix_right_type G_inv, scratch_vector_type curvature_coefficients, scratch_vector_type curvature_gradients) const
Evaluates a polynomial basis with a target functional applied, using information from the manifold cu...
constexpr SamplingFunctional StaggeredEdgeIntegralSample
Samples consist of the result of integrals of a vector dotted with the tangent along edges between ne...
SVD factorization performed on P*sqrt(w) matrix.
const SamplingFunctional _polynomial_sampling_functional
polynomial sampling functional used to construct P matrix, set at GMLS class instantiation ...
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
Tag for functor to calculate prestencil weights to apply to data to transform into a format expected ...
KOKKOS_INLINE_FUNCTION void getPDims(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N, int &out_row, int &out_col)
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
Kokkos::View< double **, layout_right > _source_coordinates
all coordinates for the source for which _neighbor_lists refers (device)
void generateAlphas(const int number_of_batches=1, const bool keep_coefficients=false)
Meant to calculate target operations and apply the evaluations to the previously ! constructed polyno...
int _dimension_of_quadrature_points
dimension of quadrature rule
KOKKOS_INLINE_FUNCTION void computeTargetFunctionals(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row) const
Evaluates a polynomial basis with a target functional applied to each member of the basis...
KOKKOS_INLINE_FUNCTION void createWeightsAndPForCurvature(const member_type &teamMember, scratch_vector_type delta, scratch_vector_type thread_workspace, scratch_matrix_right_type P, scratch_vector_type w, const int dimension, bool only_specific_order, scratch_matrix_right_type *V=NULL) const
Fills the _P matrix with P*sqrt(w) for use in solving for curvature.
ParallelManager _pm
determines scratch level spaces and is used to call kernels
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
KOKKOS_INLINE_FUNCTION void computeCurvatureFunctionals(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type P_target_row, const scratch_matrix_right_type *V, const local_index_type local_neighbor_index=-1) const
Evaluates a polynomial basis for the curvature with a gradient target functional applied.
int _added_alpha_size
additional alpha coefficients due to constraints
ProblemType _problem_type
problem type for GMLS problem, can also be set to STANDARD for normal or MANIFOLD for manifold proble...
int _basis_multiplier
dimension of the reconstructed function e.g.
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
int _global_dimensions
spatial dimension of the points, set at class instantiation only
Kokkos::View< double * > _T
Rank 3 tensor for high order approximation of tangent vectors for all problems.
void setTeamScratchSize(const int level, const int value)
LU factorization performed on P^T*W*P matrix.
bool _store_PTWP_inv_PTW
whether polynomial coefficients were requested to be stored (in a state not yet applied to data) ...
void setThreadScratchSize(const int level, const int value)
void batchSVDFactorize(ParallelManager pm, bool swap_layout_P, double *P, int lda, int nda, bool swap_layout_RHS, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of SVD problems.
KOKKOS_INLINE_FUNCTION int getNumberOfQuadraturePoints() const
void batchQRFactorize(ParallelManager pm, double *P, int lda, int nda, double *RHS, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const size_t max_neighbors, const int initial_index_of_batch, int *neighbor_list_sizes)
Calls LAPACK or CUBLAS to solve a batch of QR problems.
ReconstructionSpace _reconstruction_space
reconstruction space for GMLS problems, set at GMLS class instantiation
Kokkos::View< const double *, layout_right >::HostMirror _host_alphas
generated alpha coefficients (host)
Kokkos::View< double * > _P
P*sqrt(w) matrix for all problems.
Tag for functor to evaluate targets, apply target evaluation to polynomial coefficients to store in _...
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
constexpr SamplingFunctional PointSample
Available sampling functionals.
int _dimensions
dimension of the problem, set at class instantiation only
int calculateSamplingMultiplier(const ReconstructionSpace rs, const SamplingFunctional sro) const
Calculate sampling_multiplier.
std::string _quadrature_type
quadrature rule type
Kokkos::View< TargetOperation * >::HostMirror _host_operations
vector containing target functionals to be applied for reconstruction problem (host) ...
bool _nontrivial_nullspace
whether or not operator to be inverted for GMLS problem has a nontrivial nullspace (requiring SVD) ...
struct SubviewND< T, T2, enable_if_t<(T::rank< 2)> >{T _data_in;T2 _data_original_view;bool _scalar_as_vector_if_needed;SubviewND(T data_in, T2 data_original_view, bool scalar_as_vector_if_needed){_data_in=data_in;_data_original_view=data_original_view;_scalar_as_vector_if_needed=scalar_as_vector_if_needed;}auto get1DView(const int column_num) -> decltype(Kokkos::subview(_data_in, Kokkos::ALL))
Creates 1D subviews of data from a 1D view, generally constructed with CreateNDSliceOnDeviceView.
global_index_type getTotalNeighborsOverAllListsHost() const
Get the sum of the number of neighbors of all targets&#39; neighborhoods (host)
Tag for functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity for curva...
KOKKOS_INLINE_FUNCTION void operator()(const AssembleStandardPsqrtW &, const member_type &teamMember) const
Functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
bool nontrivial_nullspace
Whether the SamplingFunctional + ReconstructionSpace results in a nontrivial nullspace requiring SVD...
KOKKOS_INLINE_FUNCTION XYZ getRelativeCoord(const int target_index, const int neighbor_list_num, const int dimension, const scratch_matrix_right_type *V=NULL) const
Returns the relative coordinate as a vector between the target site and the neighbor site...
KOKKOS_INLINE_FUNCTION void createM(const member_type &teamMember, scratch_matrix_right_type M_data, scratch_matrix_right_type weighted_P, const int columns, const int rows)
Creates a matrix M=A^T*A from a matrix A.
ConstraintType _constraint_type
constraint type for GMLS problem
void setThreadsPerTeam(const int value)
int _local_dimensions
dimension of the problem, set at class instantiation only. For manifolds, generally _global_dimension...
KOKKOS_INLINE_FUNCTION int getRHSSquareDim(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N)
int _order_of_quadrature_points
order of exact polynomial integration for quadrature rule
KOKKOS_INLINE_FUNCTION void evaluateConstraints(scratch_matrix_right_type M, scratch_matrix_right_type PsqrtW, const ConstraintType constraint_type, const ReconstructionSpace reconstruction_space, const int NP, const double cutoff_p, const int dimension, const int num_neighbors=0, scratch_matrix_right_type *T=NULL)
#define compadre_assert_release(condition)
compadre_assert_release is used for assertions that should always be checked, but generally are not e...
Tag for functor to determine if tangent directions need reordered, and to reorder them if needed...
Tag for functor to evaluate curvature targets and construct accurate tangent direction approximation ...
KOKKOS_INLINE_FUNCTION int getNNeighbors(const int target_index) const
Returns number of neighbors for a particular target.
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type Q, scratch_vector_type w, scratch_matrix_right_type P_target_row, const int target_NP) const
Helper function for applying the evaluations from a target functional to the polynomial coefficients...
int _poly_order
order of basis for polynomial reconstruction
#define compadre_kernel_assert_debug(condition)
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const