Compadre  1.5.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Compadre_GMLS.cpp
Go to the documentation of this file.
1 #include "Compadre_GMLS.hpp"
2 #include "Compadre_Functors.hpp"
3 
4 namespace Compadre {
5 
6 void GMLS::generatePolynomialCoefficients(const int number_of_batches, const bool keep_coefficients, const bool clear_cache) {
7 
8  compadre_assert_release( (keep_coefficients==false || number_of_batches==1)
9  && "keep_coefficients is set to true, but number of batches exceeds 1.");
10 
11  /*
12  * Verify PointConnections are valid
13  */
14  this->verifyPointConnections(true);
16 
17  // ensure that solution set has neighbor list consistent with point connections
20 
21  /*
22  * Generate Quadrature
23  */
25 
26  /*
27  * Generate SolutionSet on device
28  */
30 
31  /*
32  * Operations to Device
33  */
34 
35  // copy over operations
36  _operations = decltype(_operations)("operations", _h_ss._lro.size());
37  _host_operations = Kokkos::create_mirror_view(_operations);
38 
39  // make sure at least one target operation specified
40  compadre_assert_release((_h_ss._lro.size() > 0)
41  && "No target operations added to GMLS class before calling generatePolynomialCoefficients().");
42 
43  // loop through list of linear reconstruction operations to be performed and set them on the host
44  for (size_t i=0; i<_h_ss._lro.size(); ++i) _host_operations(i) = _h_ss._lro[i];
45 
46  // get copy of operations on the device
47  Kokkos::deep_copy(_operations, _host_operations);
48 
49  /*
50  * Initialize Alphas and Prestencil Weights
51  */
52 
53  // throw an assertion for QR solver incompatibility
54  // TODO: this is a temporary location for this check, in the future the
55  // constraint type could be an object that can check when given a dense_solver_type
58  && "Cannot solve GMLS problems with the NEUMANN_GRAD_SCALAR constraint using QR Factorization.");
59 
60  // calculate the additional size for different constraint problems
63 
64  // initialize all alpha values to be used for taking the dot product with data to get a reconstruction
65  try {
67  int total_added_alphas = _pc._target_coordinates.extent(0)*_d_ss._added_alpha_size;
68  _d_ss._alphas =
69  decltype(_d_ss._alphas)("alphas", (total_neighbors + TO_GLOBAL(total_added_alphas))
71  // this deep copy writes to all theoretically allocated memory,
72  // ensuring that allocation attempted was successful
73  Kokkos::deep_copy(_d_ss._alphas, 0.0);
74  } catch(std::exception &e) {
75  printf("Insufficient memory to store alphas: \n\n%s", e.what());
76  throw e;
77  }
78 
79  // initialize the prestencil weights that are applied to sampling data to put it into a form
80  // that the GMLS operator will be able to operate on
81  auto sro = _data_sampling_functional;
82  int max_num_neighbors = _pc._nla.getMaxNumNeighbors();
83  try {
84  _prestencil_weights = decltype(_prestencil_weights)("Prestencil weights",
85  std::pow(2,sro.use_target_site_weights),
86  (sro.transform_type==DifferentEachTarget
87  || sro.transform_type==DifferentEachNeighbor) ?
88  this->getNeighborLists()->getNumberOfTargets() : 1,
89  (sro.transform_type==DifferentEachNeighbor) ?
90  max_num_neighbors : 1,
91  (sro.output_rank>0) ?
93  (sro.input_rank>0) ?
94  _global_dimensions : 1);
95  } catch(std::exception &e) {
96  printf("Insufficient memory to store prestencil weights: \n\n%s", e.what());
97  throw e;
98  }
99  Kokkos::fence();
100 
101  /*
102  * Determine if Nonstandard Sampling Dimension or Basis Component Dimension
103  */
104 
105  // calculate the dimension of the basis (a vector space on a manifold requires two components, for example)
107 
108  // calculate sampling dimension
110 
111  // effective number of components in the basis
113 
114  // special case for using a higher order for sampling from a polynomial space that are gradients of a scalar polynomial
116  // if the reconstruction is being made with a gradient of a basis, then we want that basis to be one order higher so that
117  // the gradient is consistent with the convergence order expected.
118  _poly_order += 1;
120  }
121 
122  /*
123  * Dimensions
124  */
125 
126  // for tallying scratch space needed for device kernel calls
127  int team_scratch_size_a = 0;
128 
129  // TEMPORARY, take to zero after conversion
130  int team_scratch_size_b = 0;
131  int thread_scratch_size_a = 0;
132  int thread_scratch_size_b = 0;
133 
134  // dimensions that are relevant for each subproblem
135  int max_num_rows = _sampling_multiplier*max_num_neighbors;
136  int this_num_cols = _basis_multiplier*_NP;
137  int manifold_NP = 0;
138 
139  // determines whether RHS will be stored implicitly
140  // if true, instead of RHS storing the full sqrt(W) explicitly,
141  // only the diagonal entries of sqrt(W) will be stored as a
142  // 1D array beginning at entry with matrix coordinate (0,0)
143  bool implicit_RHS = (_dense_solver_type != DenseSolverType::LU);
144 
145  int basis_powers_space_multiplier = (_reconstruction_space == BernsteinPolynomial) ? 2 : 1;
147  // these dimensions already calculated differ in the case of manifolds
150  const int max_manifold_NP = (manifold_NP > _NP) ? manifold_NP : _NP;
151  this_num_cols = _basis_multiplier*max_manifold_NP;
152  const int max_poly_order = (_poly_order > _curvature_poly_order) ? _poly_order : _curvature_poly_order;
153 
154  /*
155  * Calculate Scratch Space Allocations
156  */
157 
158  team_scratch_size_b += scratch_matrix_right_type::shmem_size(_dimensions, _dimensions); // PTP matrix
159  team_scratch_size_b += scratch_vector_type::shmem_size( (_dimensions-1)*max_num_neighbors ); // manifold_gradient
160 
161  thread_scratch_size_a += scratch_vector_type::shmem_size(this_num_cols); // delta, used for each thread
162  thread_scratch_size_a += scratch_vector_type::shmem_size(
163  (max_poly_order+1)*_global_dimensions*basis_powers_space_multiplier); // temporary space for powers in basis
165  team_scratch_size_b += scratch_vector_type::shmem_size(max_num_neighbors); // t1 work vector for prestencils
166  team_scratch_size_b += scratch_vector_type::shmem_size(max_num_neighbors); // t2 work vector for prestencils
167  thread_scratch_size_b += scratch_vector_type::shmem_size(_dimensions*_dimensions); // temporary tangent calculations, used for each thread
168  }
169 
170  // allocate data on the device (initialized to zero)
172  _T = Kokkos::View<double*>("tangent approximation",_pc._target_coordinates.extent(0)*_dimensions*_dimensions);
173  Kokkos::deep_copy(_T, 0.0);
174  }
175  _manifold_curvature_coefficients = Kokkos::View<double*>("manifold curvature coefficients",
176  _pc._target_coordinates.extent(0)*manifold_NP);
177  Kokkos::deep_copy(_manifold_curvature_coefficients, 0.0);
178 
179  } else { // Standard GMLS
180 
181  /*
182  * Calculate Scratch Space Allocations
183  */
184 
185  thread_scratch_size_a += scratch_vector_type::shmem_size(this_num_cols); // delta, used for each thread
186  thread_scratch_size_a += scratch_vector_type::shmem_size(
187  (_poly_order+1)*_global_dimensions*basis_powers_space_multiplier); // temporary space for powers in basis
188 
189  }
190  _pm.setTeamScratchSize(0, team_scratch_size_a);
191  _pm.setTeamScratchSize(1, team_scratch_size_b);
192  _pm.setThreadScratchSize(0, thread_scratch_size_a);
193  _pm.setThreadScratchSize(1, thread_scratch_size_b);
194 
195  /*
196  * Calculate the size for matrix P and RHS
197  */
198 
199  int RHS_dim_0, RHS_dim_1;
200  getRHSDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, RHS_dim_0, RHS_dim_1);
201 
202  int P_dim_0, P_dim_1;
203  getPDims(_dense_solver_type, _constraint_type, _reconstruction_space, _dimensions, max_num_rows, this_num_cols, P_dim_0, P_dim_1);
204 
205  /*
206  * Allocate Global Device Storage of Data Needed Over Multiple Calls
207  */
208 
209  global_index_type max_batch_size = (_pc._target_coordinates.extent(0) + TO_GLOBAL(number_of_batches) - 1) / TO_GLOBAL(number_of_batches);
210  try {
211  _RHS = Kokkos::View<double*>("RHS", max_batch_size*TO_GLOBAL(RHS_dim_0)*TO_GLOBAL(RHS_dim_1));
212  _P = Kokkos::View<double*>("P", max_batch_size*TO_GLOBAL(P_dim_0)*TO_GLOBAL(P_dim_1));
213  _w = Kokkos::View<double*>("w", max_batch_size*TO_GLOBAL(max_num_rows));
214  _Z = Kokkos::View<double*>("Z", max_batch_size*TO_GLOBAL(_d_ss._total_alpha_values*_d_ss._max_evaluation_sites_per_target*this_num_cols));
215 
216  } catch (std::exception &e) {
217  printf("Failed to allocate space for RHS, P, and w. Consider increasing number_of_batches: \n\n%s", e.what());
218  throw e;
219  }
220  Kokkos::fence();
221 
222  /*
223  * Calculate Optimal Threads Based On Levels of Parallelism
224  */
225 
228  && "Normal vectors are required for solving GMLS problems with the NEUMANN_GRAD_SCALAR constraint.");
229  }
230 
232  for (int batch_num=0; batch_num<number_of_batches; ++batch_num) {
233 
234  auto this_batch_size = std::min(_pc._target_coordinates.extent(0)-_initial_index_for_batch, max_batch_size);
235  Kokkos::deep_copy(_RHS, 0.0);
236  Kokkos::deep_copy(_P, 0.0);
237  Kokkos::deep_copy(_w, 0.0);
238  Kokkos::deep_copy(_Z, 0.0);
239 
240  auto gmls_basis_data = this->extractBasisData();
241  auto gmls_solution_data = this->extractSolutionData();
242 
243 
244  // even kernels that should run on other # of vector lanes do not (on GPU)
245  auto tp = _pm.TeamPolicyThreadsAndVectors(this_batch_size, _pm._default_threads, 1);
246  //auto tp = _pm.TeamPolicyThreadsAndVectors(this_batch_size, _pm._default_threads, _pm._default_vector_lanes);
247  //const auto work_item_property = Kokkos::Experimental::WorkItemProperty::HintLightWeight;
248  //const auto tp2 = Kokkos::Experimental::require(tp, work_item_property);
249 
251 
252  /*
253  * MANIFOLD Problems
254  */
255 
256  //auto functor_name = Name(gmls_basis_data);
257  //Kokkos::parallel_for("Name", tp, functor_name);
258  if (!_orthonormal_tangent_space_provided) { // user did not specify orthonormal tangent directions, so we approximate them first
259  // coarse tangent plane approximation construction of P^T*P
260  auto functor_compute_coarse_tangent_plane = ComputeCoarseTangentPlane(gmls_basis_data);
261  Kokkos::parallel_for("ComputeCoarseTangentPlane", tp, functor_compute_coarse_tangent_plane);
262 
263  // if the user provided the reference outward normal direction, then orient the computed or user provided
264  // outward normal directions in the tangent bundle
266  // use the reference outward normal direction provided by the user to orient
267  // the tangent bundle
268  auto functor_fix_tangent_direction_ordering = FixTangentDirectionOrdering(gmls_basis_data);
269  Kokkos::parallel_for("FixTangentDirectionOrdering", tp, functor_fix_tangent_direction_ordering);
270  }
271 
272  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
273  auto functor_assemble_curvature_psqrtw = AssembleCurvaturePsqrtW(gmls_basis_data);
274  Kokkos::parallel_for("AssembleCurvaturePsqrtW", tp, functor_assemble_curvature_psqrtw);
275 
277  // solves P^T*P against P^T*W with LU, stored in P
278  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
279  // batchLU expects layout_left matrix tiles for B
280  // by giving it layout_right matrix tiles with reverse ordered ldb and ndb
281  // it effects a transpose of _P in layout_left
282  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
283  Kokkos::Profiling::popRegion();
284  } else {
285  // solves P*sqrt(weights) against sqrt(weights)*Identity with QR, stored in RHS
286  Kokkos::Profiling::pushRegion("Curvature QR+Pivoting Factorization");
287  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_neighbors, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
288  Kokkos::Profiling::popRegion();
289  }
290 
291  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
292  auto functor_get_accurate_tangent_directions = GetAccurateTangentDirections(gmls_basis_data);
293  Kokkos::parallel_for("GetAccurateTangentDirections", tp, functor_get_accurate_tangent_directions);
294 
295  // Due to converting layout, entries that are assumed zeros may become non-zeros.
296  Kokkos::deep_copy(_P, 0.0);
297 
298  if (batch_num==number_of_batches-1) {
299  // copy tangent bundle from device back to host
300  _host_T = Kokkos::create_mirror_view(_T);
301  Kokkos::deep_copy(_host_T, _T);
302  }
303  }
304 
305  // this time assembling curvature PsqrtW matrix is using a highly accurate approximation of the tangent, previously calculated
306  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity for curvature
307  auto functor_assemble_curvature_psqrtw = AssembleCurvaturePsqrtW(gmls_basis_data);
308  Kokkos::parallel_for("AssembleCurvaturePsqrtW", tp, functor_assemble_curvature_psqrtw);
309 
311  // solves P^T*P against P^T*W with LU, stored in P
312  Kokkos::Profiling::pushRegion("Curvature LU Factorization");
313  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, manifold_NP, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
314  Kokkos::Profiling::popRegion();
315  } else {
316  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
317  Kokkos::Profiling::pushRegion("Curvature QR+Pivoting Factorization");
318  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_neighbors, manifold_NP, max_num_neighbors, this_batch_size, implicit_RHS);
319  Kokkos::Profiling::popRegion();
320  }
321 
322  // evaluates targets, applies target evaluation to polynomial coefficients for curvature
323  auto functor_apply_curvature_targets = ApplyCurvatureTargets(gmls_basis_data);
324  Kokkos::parallel_for("ApplyCurvatureTargets", tp, functor_apply_curvature_targets);
325  Kokkos::fence();
326 
327  // prestencil weights calculated here. appropriate because:
328  // precedes polynomial reconstruction from data (replaces contents of _RHS)
329  // follows reconstruction of geometry
330  // calculate prestencil weights
331  auto functor_compute_prestencil_weights = ComputePrestencilWeights(gmls_basis_data);
332  Kokkos::parallel_for("ComputePrestencilWeights", tp, functor_compute_prestencil_weights);
333 
334  // Due to converting layout, entried that are assumed zeros may become non-zeros.
335  Kokkos::deep_copy(_P, 0.0);
336 
337  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
338  auto functor_assemble_manifold_psqrtw = AssembleManifoldPsqrtW(gmls_basis_data);
339  Kokkos::parallel_for("AssembleManifoldPsqrtW", tp, functor_assemble_manifold_psqrtw);
340 
341  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
343  Kokkos::Profiling::pushRegion("Manifold LU Factorization");
344  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
345  Kokkos::Profiling::popRegion();
346  } else {
347  Kokkos::Profiling::pushRegion("Manifold QR+Pivoting Factorization");
348  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
349  Kokkos::Profiling::popRegion();
350  }
351  Kokkos::fence();
352 
353  } else {
354 
355  /*
356  * STANDARD GMLS Problems
357  */
358 
359  // assembles the P*sqrt(weights) matrix and constructs sqrt(weights)*Identity
360  auto functor_assemble_standard_psqrtw = AssembleStandardPsqrtW(gmls_basis_data);
361  //printf("size of assemble: %lu\n", sizeof(functor_assemble_standard_psqrtw));
362  Kokkos::parallel_for("AssembleStandardPsqrtW", tp, functor_assemble_standard_psqrtw);
363  Kokkos::fence();
364 
365  // solves P*sqrt(weights) against sqrt(weights)*Identity, stored in RHS
367  Kokkos::Profiling::pushRegion("LU Factorization");
368  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_left,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _d_ss._added_alpha_size, this_batch_size, implicit_RHS);
369  Kokkos::Profiling::popRegion();
370  } else {
371  Kokkos::Profiling::pushRegion("QR+Pivoting Factorization");
373  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _RHS.data(), RHS_dim_0, RHS_dim_1, _P.data(), P_dim_1, P_dim_0, this_num_cols + added_coeff_size, this_num_cols + added_coeff_size, max_num_rows + _d_ss._added_alpha_size, this_batch_size, implicit_RHS);
374  } else {
375  GMLS_LinearAlgebra::batchQRPivotingSolve<layout_right,layout_right,layout_right>(_pm, _P.data(), P_dim_0, P_dim_1, _RHS.data(), RHS_dim_0, RHS_dim_1, max_num_rows, this_num_cols, max_num_rows, this_batch_size, implicit_RHS);
376  }
377  Kokkos::Profiling::popRegion();
378  }
379 
380  auto functor_compute_prestencil_weights = ComputePrestencilWeights(gmls_basis_data);
381  Kokkos::parallel_for("ComputePrestencilWeights", tp, functor_compute_prestencil_weights);
382  Kokkos::fence();
383  }
384 
385  /*
386  * Calculate Optimal Threads Based On Levels of Parallelism
387  */
388 
389 
391 
392  /*
393  * MANIFOLD Problems
394  */
395 
396  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
397  auto functor_evaluate_manifold_targets = EvaluateManifoldTargets(gmls_basis_data);
398  Kokkos::parallel_for("EvaluateManifoldTargets", tp, functor_evaluate_manifold_targets);
399 
400  } else {
401 
402  /*
403  * STANDARD GMLS Problems
404  */
405 
406  // evaluates targets, applies target evaluation to polynomial coefficients to store in _alphas
407  auto functor_evaluate_standard_targets = EvaluateStandardTargets(gmls_basis_data);
408  Kokkos::parallel_for("EvaluateStandardTargets", tp, functor_evaluate_standard_targets);
409  }
410 
411 
412  // fine grain control over applying target (most expensive part after QR solve)
413  ParallelManager pm;
414  tp = pm.TeamPolicyThreadsAndVectors(this_batch_size, pm._default_threads, pm._default_vector_lanes);
415  const auto work_item_property = Kokkos::Experimental::WorkItemProperty::HintLightWeight;
416  const auto tp2 = Kokkos::Experimental::require(tp, work_item_property);
417  auto functor_apply_targets = ApplyTargets(gmls_solution_data);
418  //printf("size of apply: %lu\n", sizeof(functor_apply_targets));
419  Kokkos::parallel_for("ApplyTargets", tp2, functor_apply_targets);
420 
421 
422  _initial_index_for_batch += this_batch_size;
423  if ((size_t)_initial_index_for_batch == _pc._target_coordinates.extent(0)) break;
424  } // end of batch loops
425 
426  if (clear_cache) {
427  // deallocate _P and _w
428  _w = Kokkos::View<double*>("w",0);
429  _Z = Kokkos::View<double*>("Z",0);
430  if (number_of_batches > 1) { // no reason to keep coefficients if they aren't all in memory
431  _RHS = Kokkos::View<double*>("RHS",0);
432  _P = Kokkos::View<double*>("P",0);
434  } else {
436  _RHS = Kokkos::View<double*>("RHS", 0);
437  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
438  } else {
440  _P = Kokkos::View<double*>("P", 0);
441  if (!keep_coefficients) _RHS = Kokkos::View<double*>("RHS", 0);
442  } else {
443  _RHS = Kokkos::View<double*>("RHS", 0);
444  if (!keep_coefficients) _P = Kokkos::View<double*>("P", 0);
445  }
446  }
447  if (keep_coefficients) _store_PTWP_inv_PTW = true;
448  }
449  }
450 
451  /*
452  * Device to Host Copy Of Solution
453  */
454  // copy computed alphas back to the host
455  this->_d_ss._contains_valid_alphas = true;
458  _host_prestencil_weights = Kokkos::create_mirror_view(_prestencil_weights);
459  Kokkos::deep_copy(_host_prestencil_weights, _prestencil_weights);
460  }
461 
462 }
463 
464 void GMLS::generateAlphas(const int number_of_batches, const bool keep_coefficients, const bool clear_cache) {
465 
466  this->generatePolynomialCoefficients(number_of_batches, keep_coefficients, clear_cache);
467 
468 }
469 
471  auto gmls = *this;
472  auto data = GMLSSolutionData();
473  data._sampling_multiplier = gmls._sampling_multiplier;
474  data._initial_index_for_batch = gmls._initial_index_for_batch;
475  data._d_ss = gmls._d_ss;
476 
477  // store results of calculation in struct
478  const int max_num_neighbors = gmls._pc._nla.getMaxNumNeighbors();
479  const int max_num_rows = gmls._sampling_multiplier*max_num_neighbors;
480 
481  // applyTargetsToCoefficients currently uses data.this_num_cols for the
482  // dot product range. Even for manifold problems, it is still appropriate to
483  // use gmls._basis_multiplier*gmls._NP. If GMLSSolutionData was ever to be
484  // used for applying solution coefficients for the curvature reconstruction,
485  // the manifolf_NP would have to be used for that application (requiring an
486  // extra argument to applyTargetsToCoefficients)
487  data.this_num_cols = gmls._basis_multiplier*gmls._NP;
488 
489  int RHS_dim_0, RHS_dim_1;
490  getRHSDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
491  gmls._dimensions, max_num_rows, data.this_num_cols, RHS_dim_0, RHS_dim_1);
492  int P_dim_0, P_dim_1;
493  getPDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
494  gmls._dimensions, max_num_rows, data.this_num_cols, P_dim_0, P_dim_1);
495 
496  if ((gmls._constraint_type == ConstraintType::NO_CONSTRAINT) && (gmls._dense_solver_type != DenseSolverType::LU)) {
497  data.Coeffs_data = gmls._RHS.data();
498  data.Coeffs_dim_0 = RHS_dim_0;
499  data.Coeffs_dim_1 = RHS_dim_1;
500  } else {
501  data.Coeffs_data = gmls._P.data();
502  data.Coeffs_dim_0 = P_dim_1;
503  data.Coeffs_dim_1 = P_dim_0;
504  }
505 
506  data.P_target_row_dim_0 = gmls._d_ss._total_alpha_values*gmls._d_ss._max_evaluation_sites_per_target;
507  data.P_target_row_dim_1 = data.this_num_cols;
508  data.P_target_row_data = gmls._Z.data();
509 
510  data.operations_size = gmls._operations.size();
511 
512  data.number_of_neighbors_list = gmls._pc._nla._number_of_neighbors_list;
513  data.additional_number_of_neighbors_list = gmls._additional_pc._nla._number_of_neighbors_list;
514 
515  return data;
516 }
517 
519  auto gmls = *this;
520  auto data = GMLSBasisData();
521  data._source_extra_data = gmls._source_extra_data;
522  data._target_extra_data = gmls._target_extra_data;
523  data._pc = gmls._pc;
524  data._epsilons = gmls._epsilons ;
525  data._prestencil_weights = gmls._prestencil_weights ;
526  data._additional_pc = gmls._additional_pc;
527  data._poly_order = gmls._poly_order ;
528  data._curvature_poly_order = gmls._curvature_poly_order;
529  data._NP = gmls._NP;
530  data._global_dimensions = gmls._global_dimensions;
531  data._local_dimensions = gmls._local_dimensions;
532  data._dimensions = gmls._dimensions;
533  data._reconstruction_space = gmls._reconstruction_space;
534  data._reconstruction_space_rank = gmls._reconstruction_space_rank;
535  data._dense_solver_type = gmls._dense_solver_type;
536  data._problem_type = gmls._problem_type;
537  data._constraint_type = gmls._constraint_type;
538  data._polynomial_sampling_functional = gmls._polynomial_sampling_functional;
539  data._data_sampling_functional = gmls._data_sampling_functional;
540  data._curvature_support_operations = gmls._curvature_support_operations;
541  data._operations = gmls._operations;
542  data._weighting_type = gmls._weighting_type;
543  data._curvature_weighting_type = gmls._curvature_weighting_type;
544  data._weighting_p = gmls._weighting_p;
545  data._weighting_n = gmls._weighting_n;
546  data._curvature_weighting_p = gmls._curvature_weighting_p;
547  data._curvature_weighting_n = gmls._curvature_weighting_n;
548  data._basis_multiplier = gmls._basis_multiplier;
549  data._sampling_multiplier = gmls._sampling_multiplier;
550  data._data_sampling_multiplier = gmls._data_sampling_multiplier;
551  data._initial_index_for_batch = gmls._initial_index_for_batch;
552  data._pm = gmls._pm;
553  data._order_of_quadrature_points = gmls._order_of_quadrature_points;
554  data._dimension_of_quadrature_points = gmls._dimension_of_quadrature_points;
555  data._qm = gmls._qm;
556  data._d_ss = gmls._d_ss;
557 
558  data.max_num_neighbors = gmls._pc._nla.getMaxNumNeighbors();
559  data.max_num_rows = gmls._sampling_multiplier*data.max_num_neighbors;
560  int basis_powers_space_multiplier = (gmls._reconstruction_space == BernsteinPolynomial) ? 2 : 1;
561  if (gmls._problem_type == ProblemType::MANIFOLD) {
562  data.manifold_NP = GMLS::getNP(gmls._curvature_poly_order, gmls._dimensions-1,
564  data.max_manifold_NP = (data.manifold_NP > gmls._NP) ? data.manifold_NP : gmls._NP;
565  data.this_num_cols = gmls._basis_multiplier*data.max_manifold_NP;
566  data.max_poly_order = (gmls._poly_order > gmls._curvature_poly_order) ?
567  gmls._poly_order : gmls._curvature_poly_order;
568 
569  data.ref_N_data = gmls._ref_N.data();
570  data.ref_N_dim = gmls._dimensions;
571 
572  data.thread_workspace_dim = (data.max_poly_order+1)*gmls._global_dimensions*basis_powers_space_multiplier;
573  data.manifold_gradient_dim = (gmls._dimensions-1)*data.max_num_neighbors;
574 
575  data.manifold_curvature_coefficients_data = gmls._manifold_curvature_coefficients.data();
576 
577  } else {
578  data.manifold_NP = 0;
579  data.this_num_cols = gmls._basis_multiplier*gmls._NP;
580  data.thread_workspace_dim = (gmls._poly_order+1)*gmls._global_dimensions*basis_powers_space_multiplier;
581  data.manifold_gradient_dim = 0;
582  }
583 
584 
585  data.T_data = gmls._T.data();
586 
587  data.P_target_row_dim_0 = gmls._d_ss._total_alpha_values*gmls._d_ss._max_evaluation_sites_per_target;
588  data.P_target_row_dim_1 = data.this_num_cols;
589  data.P_target_row_data = gmls._Z.data();
590 
591  data.RHS_data = gmls._RHS.data();
592  getRHSDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
593  gmls._dimensions, data.max_num_rows, data.this_num_cols, data.RHS_dim_0, data.RHS_dim_1);
594 
595  data.P_data = gmls._P.data();
596  getPDims(gmls._dense_solver_type, gmls._constraint_type, gmls._reconstruction_space,
597  gmls._dimensions, data.max_num_rows, data.this_num_cols, data.P_dim_0, data.P_dim_1);
598 
599  data.w_data = gmls._w.data();
600 
601  if ((gmls._constraint_type == ConstraintType::NO_CONSTRAINT) && (gmls._dense_solver_type != DenseSolverType::LU)) {
602  data.Coeffs_data = gmls._RHS.data();
603  data.Coeffs_dim_0 = data.RHS_dim_0;
604  data.Coeffs_dim_1 = data.RHS_dim_1;
605  } else {
606  data.Coeffs_data = gmls._P.data();
607  data.Coeffs_dim_0 = data.P_dim_1;
608  data.Coeffs_dim_1 = data.P_dim_0;
609  }
610 
611  return data;
612 }
613 
614 } // Compadre
Kokkos::TeamPolicy< device_execution_space > TeamPolicyThreadsAndVectors(const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Creates a team policy for a parallel_for parallel_for will break out over loops over teams with each ...
KOKKOS_INLINE_FUNCTION int getOutputDimensionOfSampling(SamplingFunctional sro, const int local_dimensions)
Dimensions ^ output rank for sampling operation (always in local chart if on a manifold, never ambient space)
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getAdditionalAlphaSizeFromConstraint(DenseSolverType dense_solver_type, ConstraintType constraint_type)
Kokkos::View< const double *****, layout_right >::HostMirror _host_prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
Kokkos::View< TargetOperation * > _operations
vector containing target functionals to be applied for reconstruction problem (device) ...
bool _entire_batch_computed_at_once
whether entire calculation was computed at once the alternative is that it was broken up over many sm...
Kokkos::View< double * > _manifold_curvature_coefficients
curvature polynomial coefficients for all problems
Neumann Gradient Scalar Type.
SolutionSet< host_memory_space > _h_ss
Solution Set (contains all alpha values from solution and alpha layout methods)
Functor to evaluate curvature targets and apply to coefficients of curvature reconstruction.
KOKKOS_INLINE_FUNCTION int getNumberOfTargets() const
Get number of total targets having neighborhoods (host/device).
bool _reference_outward_normal_direction_provided
whether or not the reference outward normal directions were provided by the user. ...
KOKKOS_INLINE_FUNCTION int getAdditionalCoeffSizeFromConstraintAndSpace(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension)
int _NP
dimension of basis for polynomial reconstruction
bool _orthonormal_tangent_space_provided
whether or not the orthonormal tangent directions were provided by the user.
const GMLSBasisData extractBasisData() const
Get GMLS basis data.
Functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
bool _use_reference_outward_normal_direction_provided_to_orient_surface
whether or not to use reference outward normal directions to orient the surface in a manifold problem...
Kokkos::View< double * > _RHS
sqrt(w)*Identity matrix for all problems, later holds polynomial coefficients for all problems ...
Kokkos::View< double * > _w
contains weights for all problems
KOKKOS_INLINE_FUNCTION int calculateSamplingMultiplier(const ReconstructionSpace rs, const SamplingFunctional sro, const int local_dimensions)
Calculate sampling_multiplier.
int _sampling_multiplier
actual dimension of the sampling functional e.g.
int _curvature_poly_order
order of basis for curvature reconstruction
Functor to evaluate curvature targets and construct accurate tangent direction approximation for mani...
constexpr SamplingFunctional VaryingManifoldVectorPointSample
For integrating polynomial dotted with normal over an edge.
SamplingFunctional _polynomial_sampling_functional
polynomial sampling functional used to construct P matrix, set at GMLS class instantiation NOTE: ca...
int _data_sampling_multiplier
effective dimension of the data sampling functional e.g.
Quadrature _qm
manages and calculates quadrature
Kokkos::View< double * >::HostMirror _host_T
tangent vectors information (host)
Functor to evaluate targets operations on the basis.
constexpr SamplingFunctional StaggeredEdgeAnalyticGradientIntegralSample
Analytical integral of a gradient source vector is just a difference of the scalar source at neighbor...
Kokkos::View< TargetOperation *, memory_space > _lro
vector of user requested target operations
bool _contains_valid_alphas
whether internal alpha values are valid (set externally on a solve)
bool verifyPointConnections(bool assert_valid=false)
Verify whether _pc is valid.
void generatePolynomialCoefficients(const int number_of_batches=1, const bool keep_coefficients=false, const bool clear_cache=true)
Generates polynomial coefficients by setting up and solving least squares problems Sets up the batch ...
DenseSolverType _dense_solver_type
solver type for GMLS problem - can be QR, SVD or LU
neighbor_lists_type * getNeighborLists() const
Get neighbor list accessor.
Kokkos::View< double *, layout_right, memory_space > _alphas
generated alpha coefficients (device)
Each target applies a different transform for each neighbor.
Solve GMLS problem on a manifold (will use QR or SVD to solve the resultant GMLS problem dependent on...
device_mirror_target_view_type _target_coordinates
Kokkos::View< double *****, layout_right > _prestencil_weights
generated weights for nontraditional samples required to transform data into expected sampling functi...
Functor to evaluate targets on a manifold.
global_index_type getTotalNeighborsOverAllListsHost() const
Get the sum of the number of neighbors of all targets&#39; neighborhoods (host)
#define TO_GLOBAL(variable)
int _initial_index_for_batch
initial index for current batch
static KOKKOS_INLINE_FUNCTION int getNP(const int m, const int dimension=3, const ReconstructionSpace r_space=ReconstructionSpace::ScalarTaylorPolynomial)
Returns size of the basis for a given polynomial order and dimension General to dimension 1...
Functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity.
LU factorization performed on P^T*W*P matrix.
KOKKOS_INLINE_FUNCTION void getPDims(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N, int &out_row, int &out_col)
Scalar polynomial basis centered at the target site and scaled by sum of basis powers e...
QR+Pivoting factorization performed on P*sqrt(w) matrix.
point_connections_type _additional_pc
(OPTIONAL) connections between additional points and neighbors
KOKKOS_INLINE_FUNCTION int getMaxNumNeighbors() const
Get the maximum number of neighbors of all targets&#39; neighborhoods (host/device)
int _dimension_of_quadrature_points
dimension of quadrature rule
int _added_alpha_size
additional alpha coefficients due to constraints
const GMLSSolutionData extractSolutionData() const
Get GMLS solution data.
SamplingFunctional _data_sampling_functional
generally the same as _polynomial_sampling_functional, but can differ if specified at ...
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
ParallelManager _pm
determines scratch level spaces and is used to call kernels
Kokkos::View< double * > _Z
stores evaluations of targets applied to basis
int _default_threads
largest team size
ProblemType _problem_type
problem type for GMLS problem, can also be set to STANDARD for normal or MANIFOLD for manifold proble...
int _basis_multiplier
dimension of the reconstructed function e.g.
int _global_dimensions
spatial dimension of the points, set at class instantiation only
SolutionSet< device_memory_space > _d_ss
Kokkos::View< double * > _T
Rank 3 tensor for high order approximation of tangent vectors for all problems.
void setTeamScratchSize(const int level, const int value)
Functor to calculate prestencil weights to apply to data to transform into a format expected by a GML...
bool verifyAdditionalPointConnections(bool assert_valid=false)
Verify whether _additional_pc is valid.
bool _store_PTWP_inv_PTW
whether polynomial coefficients were requested to be stored (in a state not yet applied to data) ...
Functor to assemble the P*sqrt(weights) matrix and construct sqrt(weights)*Identity for curvature...
void setThreadScratchSize(const int level, const int value)
Functor to determine if tangent directions need reordered, and to reorder them if needed We require t...
ReconstructionSpace _reconstruction_space
reconstruction space for GMLS problems, set at GMLS class instantiation
Kokkos::View< double * > _P
P*sqrt(w) matrix for all problems.
KOKKOS_INLINE_FUNCTION void getRHSDims(DenseSolverType dense_solver_type, ConstraintType constraint_type, ReconstructionSpace reconstruction_space, const int dimension, const int M, const int N, int &RHS_row, int &RHS_col)
constexpr SamplingFunctional PointSample
Available sampling functionals.
int _dimensions
dimension of the problem, set at class instantiation only
std::string _quadrature_type
quadrature rule type
Kokkos::View< TargetOperation * >::HostMirror _host_operations
vector containing target functionals to be applied for reconstruction problem (host) ...
struct SubviewND< T, T2, enable_if_t<(T::rank< 2)> >{T _data_in;T2 _data_original_view;bool _scalar_as_vector_if_needed;SubviewND(T data_in, T2 data_original_view, bool scalar_as_vector_if_needed){_data_in=data_in;_data_original_view=data_original_view;_scalar_as_vector_if_needed=scalar_as_vector_if_needed;}auto get1DView(const int column_num) -> decltype(Kokkos::subview(_data_in, Kokkos::ALL))
Creates 1D subviews of data from a 1D view, generally constructed with CreateNDSliceOnDeviceView.
Each target applies a different data transform, but the same to each neighbor.
point_connections_type _pc
connections between points and neighbors
ConstraintType _constraint_type
constraint type for GMLS problem
int _local_dimensions
dimension of the problem, set at class instantiation only. For manifolds, generally _global_dimension...
Functor to apply target evaluation to polynomial coefficients to store in _alphas.
int _max_evaluation_sites_per_target
maximum number of evaluation sites for each target (includes target site)
int _order_of_quadrature_points
order of exact polynomial integration for quadrature rule
void generateAlphas(const int number_of_batches=1, const bool keep_coefficients=false, const bool clear_cache=true)
Meant to calculate target operations and apply the evaluations to the previously constructed polynomi...
#define compadre_assert_release(condition)
compadre_assert_release is used for assertions that should always be checked, but generally are not e...
Functor to create a coarse tangent approximation from a given neighborhood of points.
NeighborLists< Kokkos::View< int * > > _neighbor_lists
Accessor to get neighbor list data, offset data, and number of neighbors per target.
int _total_alpha_values
used for sizing P_target_row and the _alphas view
int _poly_order
order of basis for polynomial reconstruction
Bernstein polynomial basis.
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
KOKKOS_INLINE_FUNCTION int calculateBasisMultiplier(const ReconstructionSpace rs, const int local_dimensions)
Calculate basis_multiplier.