14 #ifndef __INTREPID2_PROJECTIONTOOLS_HPP__
15 #define __INTREPID2_PROJECTIONTOOLS_HPP__
17 #include "Intrepid2_ConfigDefs.hpp"
21 #include "Shards_CellTopology.hpp"
22 #include "Shards_BasicTopologies.hpp"
45 #include "Teuchos_LAPACK.hpp"
50 #ifdef HAVE_INTREPID2_KOKKOSKERNELS
51 #include "KokkosBatched_QR_Serial_Internal.hpp"
52 #include "KokkosBatched_ApplyQ_Serial_Internal.hpp"
53 #if KOKKOS_VERSION >= 40599
54 #include "KokkosBatched_Trsv_Decl.hpp"
56 #include "KokkosBatched_Trsv_Serial_Internal.hpp"
58 #include "KokkosBatched_Util.hpp"
115 template<
typename DeviceType>
118 using ExecSpaceType =
typename DeviceType::execution_space;
119 using MemSpaceType =
typename DeviceType::memory_space;
120 using EvalPointsType =
typename ProjectionStruct<DeviceType, double>::EvalPointsType;
142 template<
typename basisCoeffsValueType,
class ...basisCoeffsProperties,
143 typename funValsValueType,
class ...funValsProperties,
145 typename ortValueType,
class ...ortProperties>
147 getL2BasisCoeffs(Kokkos::DynRankView<basisCoeffsValueType,basisCoeffsProperties...> basisCoeffs,
148 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetAtEvalPoints,
149 const Kokkos::DynRankView<ortValueType, ortProperties...> cellOrientations,
150 const BasisType* cellBasis,
177 template<
typename basisCoeffsValueType,
class ...basisCoeffsProperties,
178 typename funValsValueType,
class ...funValsProperties,
180 typename ortValueType,
class ...ortProperties>
182 getL2DGBasisCoeffs(Kokkos::DynRankView<basisCoeffsValueType,basisCoeffsProperties...> basisCoeffs,
183 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetAtEvalPoints,
184 const Kokkos::DynRankView<ortValueType, ortProperties...> cellOrientations,
185 const BasisType* cellBasis,
211 template<
typename basisViewType,
typename targetViewType,
typename BasisType>
214 const targetViewType targetAtTargetEPoints,
215 const BasisType* cellBasis,
239 template<
class BasisCoeffsViewType,
class TargetValueViewType,
class TargetGradViewType,
240 class BasisType,
class OrientationViewType>
243 const TargetValueViewType targetAtEvalPoints,
244 const TargetGradViewType targetGradAtGradEvalPoints,
245 const OrientationViewType cellOrientations,
246 const BasisType* cellBasis,
273 template<
typename basisCoeffsValueType,
class ...basisCoeffsProperties,
274 typename funValsValueType,
class ...funValsProperties,
276 typename ortValueType,
class ...ortProperties>
278 getHCurlBasisCoeffs(Kokkos::DynRankView<basisCoeffsValueType,basisCoeffsProperties...> basisCoeffs,
279 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetAtEvalPoints,
280 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetCurlAtCurlEvalPoints,
281 const Kokkos::DynRankView<ortValueType, ortProperties...> cellOrientations,
282 const BasisType* cellBasis,
306 template<
typename basisCoeffsValueType,
class ...basisCoeffsProperties,
307 typename funValsValueType,
class ...funValsProperties,
309 typename ortValueType,
class ...ortProperties>
311 getHDivBasisCoeffs(Kokkos::DynRankView<basisCoeffsValueType,basisCoeffsProperties...> basisCoeffs,
312 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetAtEvalPoints,
313 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetDivAtDivEvalPoints,
314 const Kokkos::DynRankView<ortValueType, ortProperties...> cellOrientations,
315 const BasisType* cellBasis,
336 template<
typename basisCoeffsValueType,
class ...basisCoeffsProperties,
337 typename funValsValueType,
class ...funValsProperties,
339 typename ortValueType,
class ...ortProperties>
341 getHVolBasisCoeffs(Kokkos::DynRankView<basisCoeffsValueType,basisCoeffsProperties...> basisCoeffs,
342 const Kokkos::DynRankView<funValsValueType,funValsProperties...> targetAtEvalPoints,
343 [[maybe_unused]]
const Kokkos::DynRankView<ortValueType, ortProperties...> cellOrientations,
344 const BasisType* cellBasis,
364 template<
typename dstViewType,
365 typename dstBasisType,
366 typename srcViewType,
367 typename srcBasisType,
368 typename OrientationViewType>
371 const dstBasisType* dstBasis,
372 const srcViewType srcCoeffs,
373 const srcBasisType* srcBasis,
374 const OrientationViewType cellOrientations){
377 INTREPID2_TEST_FOR_EXCEPTION(dstBasis->getFunctionSpace() != srcBasis->getFunctionSpace(), std::runtime_error,
378 "The source and destination bases are not compatible. They need to belong to the same function space");
379 INTREPID2_TEST_FOR_EXCEPTION(dstBasis->getBaseCellTopology().getKey() != srcBasis->getBaseCellTopology().getKey(), std::runtime_error,
380 "The source and destination bases are not compatible. They do not have the same basic cell topology");
386 ordinal_type numCells = cellOrientations.extent(0);
387 ordinal_type dim = srcBasis->getBaseCellTopology().getDimension();
388 ordinal_type srcBasisCardinality = srcBasis->getCardinality();
389 ordinal_type fieldDimension = (srcBasis->getFunctionSpace() == Intrepid2::FUNCTION_SPACE_HCURL || srcBasis->getFunctionSpace() == Intrepid2::FUNCTION_SPACE_HDIV) ? dim : 1;
391 auto evaluationPoints = projStruct.getAllEvalPoints();
392 ordinal_type numPoints = evaluationPoints.extent(0);
394 using outViewType = Kokkos::DynRankView<typename srcBasisType::OutputValueType, DeviceType>;
395 outViewType srcAtEvalPoints, refBasisAtEvalPoints, basisAtEvalPoints;
396 if(fieldDimension == dim) {
397 srcAtEvalPoints = outViewType(
"srcAtEvalPoints", numCells, numPoints, dim);
398 refBasisAtEvalPoints = outViewType(
"refBasisAtEvalPoints", srcBasisCardinality, numPoints, dim);
399 basisAtEvalPoints = outViewType(
"basisAtEvalPoints", numCells, srcBasisCardinality, numPoints, dim);
401 srcAtEvalPoints = outViewType(
"srcAtEvalPoints", numCells, numPoints);
402 refBasisAtEvalPoints = outViewType(
"refBasisAtEvalPoints", srcBasisCardinality, numPoints);
403 basisAtEvalPoints = outViewType(
"basisAtEvalPoints", numCells, srcBasisCardinality, numPoints);
406 srcBasis->getValues(refBasisAtEvalPoints,evaluationPoints);
410 refBasisAtEvalPoints,
414 Kokkos::parallel_for(Kokkos::RangePolicy<typename DeviceType::execution_space>(0,numCells),
415 KOKKOS_LAMBDA (
const int &ic) {
416 for(
int j=0; j<numPoints; ++j) {
417 for(
int k=0; k<srcBasisCardinality; ++k) {
418 for(
int d=0; d<fieldDimension; ++d)
419 srcAtEvalPoints.access(ic,j,d) += srcCoeffs(ic,k)*basisAtEvalPoints.access(ic,k,j,d);
423 ExecSpaceType().fence();
446 std::string systemName_;
447 bool matrixIndependentOfCell_;
456 ElemSystem (std::string systemName,
bool matrixIndependentOfCell) :
457 systemName_(systemName), matrixIndependentOfCell_(matrixIndependentOfCell){};
486 template<
typename ViewType1,
typename ViewType2,
typename ViewType3,
typename ViewType4>
487 void solve(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 tau,
488 ViewType3 w,
const ViewType4 elemDof, ordinal_type n, ordinal_type m=0) {
489 #ifdef HAVE_INTREPID2_KOKKOSKERNELS
490 solveDevice(basisCoeffs, elemMat, elemRhs, tau,
493 solveHost(basisCoeffs, elemMat, elemRhs, tau,
501 #ifdef HAVE_INTREPID2_KOKKOSKERNELS
502 template<
typename ViewType1,
typename ViewType2,
typename ViewType3,
typename ViewType4>
503 void solveDevice(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 taul,
504 ViewType3 work,
const ViewType4 elemDof, ordinal_type n, ordinal_type m) {
505 using HostDeviceType = Kokkos::Device<Kokkos::DefaultHostExecutionSpace,Kokkos::HostSpace>;
507 ordinal_type numCells = basisCoeffs.extent(0);
509 if(matrixIndependentOfCell_) {
510 auto A0 = Kokkos::subview(elemMat, 0, Kokkos::ALL(), Kokkos::ALL());
511 auto tau0 = Kokkos::subview(taul, 0, Kokkos::ALL());
513 Kokkos::DynRankView<typename ViewType2::value_type, HostDeviceType> A0_host(
"A0_host", A0.extent(0),A0.extent(1));
514 auto A0_device = Kokkos::create_mirror_view(
typename DeviceType::memory_space(), A0_host);
515 Kokkos::deep_copy(A0_device, A0);
516 Kokkos::deep_copy(A0_host, A0_device);
518 for(ordinal_type i=n; i<n+m; ++i)
519 for(ordinal_type j=0; j<n; ++j)
520 A0_host(i,j) = A0_host(j,i);
522 Kokkos::DynRankView<typename ViewType2::value_type, HostDeviceType> tau0_host(
"A0_host", tau0.extent(0));
523 auto tau0_device = Kokkos::create_mirror_view(
typename DeviceType::memory_space(), tau0_host);
524 auto w0_host = Kokkos::create_mirror_view(Kokkos::subview(work, 0, Kokkos::ALL()));
527 #if KOKKOS_VERSION >= 40599
528 KokkosBatched::Impl::SerialQR_Internal::invoke
530 KokkosBatched::SerialQR_Internal::invoke
532 (A0_host.extent(0), A0_host.extent(1),
533 A0_host.data(), A0_host.stride_0(), A0_host.stride_1(),
534 tau0_host.data(), tau0_host.stride_0(), w0_host.data());
536 Kokkos::deep_copy(A0_device, A0_host);
537 Kokkos::deep_copy(A0, A0_device);
538 Kokkos::deep_copy(tau0_device, tau0_host);
539 Kokkos::deep_copy(tau0, tau0_device);
541 Kokkos::parallel_for (systemName_,
542 Kokkos::RangePolicy<ExecSpaceType, int> (0, numCells),
543 KOKKOS_LAMBDA (
const size_t ic) {
544 auto w = Kokkos::subview(work, ic, Kokkos::ALL());
546 auto b = Kokkos::subview(elemRhs, ic, Kokkos::ALL());
549 KokkosBatched::SerialApplyQ_RightForwardInternal::invoke(
550 1, A0.extent(0), A0.extent(1),
551 A0.data(), A0.stride_0(), A0.stride_1(),
552 tau0.data(), tau0.stride_0(),
553 b.data(), 1, b.stride_0(),
557 #if KOKKOS_VERSION >= 40599
558 KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(1.0, A0, b);
560 KokkosBatched::SerialTrsvInternalUpper<KokkosBatched::Algo::Trsv::Unblocked>::invoke(
false,
563 A0.data(), A0.stride_0(), A0.stride_1(),
564 b.data(), b.stride_0());
568 for(ordinal_type i=0; i<n; ++i){
569 basisCoeffs(ic,elemDof(i)) = b(i);
575 Kokkos::parallel_for (systemName_,
576 Kokkos::RangePolicy<ExecSpaceType, int> (0, numCells),
577 KOKKOS_LAMBDA (
const size_t ic) {
579 auto A = Kokkos::subview(elemMat, ic, Kokkos::ALL(), Kokkos::ALL());
580 auto tau = Kokkos::subview(taul, ic, Kokkos::ALL());
581 auto w = Kokkos::subview(work, ic, Kokkos::ALL());
583 for(ordinal_type i=n; i<n+m; ++i)
584 for(ordinal_type j=0; j<n; ++j)
588 #if KOKKOS_VERSION >= 40599
589 KokkosBatched::Impl::SerialQR_Internal::invoke
591 KokkosBatched::SerialQR_Internal::invoke
593 (A.extent(0), A.extent(1),
594 A.data(), A.stride_0(), A.stride_1(), tau.data(), tau.stride_0(), w.data());
596 auto b = Kokkos::subview(elemRhs, ic, Kokkos::ALL());
599 KokkosBatched::SerialApplyQ_RightForwardInternal::invoke(
600 1, A.extent(0), A.extent(1),
601 A.data(), A.stride_0(), A.stride_1(),
602 tau.data(), tau.stride_0(),
603 b.data(), 1, b.stride_0(),
607 #if KOKKOS_VERSION >= 40599
608 KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(1.0, A, b);
610 KokkosBatched::SerialTrsvInternalUpper<KokkosBatched::Algo::Trsv::Unblocked>::invoke(
false,
613 A.data(), A.stride_0(), A.stride_1(),
614 b.data(), b.stride_0());
618 for(ordinal_type i=0; i<n; ++i){
619 basisCoeffs(ic,elemDof(i)) = b(i);
629 template<
typename ViewType1,
typename ViewType2,
typename ViewType3,
typename ViewType4>
630 void solveHost(ViewType1 basisCoeffs, ViewType2 elemMat, ViewType2 elemRhs, ViewType2 ,
631 ViewType3,
const ViewType4 elemDof, ordinal_type n, ordinal_type m) {
632 using value_type =
typename ViewType2::value_type;
633 using device_type = DeviceType;
634 using host_exec_space = Kokkos::DefaultHostExecutionSpace;
635 using host_memory_space = Kokkos::HostSpace;
636 using host_device_type = Kokkos::Device<host_exec_space,host_memory_space>;
637 using vector_host_type = Kokkos::View<value_type*,host_device_type>;
638 using scratch_host_type = Kokkos::View<value_type*,host_exec_space::scratch_memory_space>;
639 using matrix_host_type = Kokkos::View<value_type**,Kokkos::LayoutLeft,host_device_type>;
640 using do_not_init_tag = Kokkos::ViewAllocateWithoutInitializing;
641 using host_team_policy_type = Kokkos::TeamPolicy<host_exec_space>;
642 using host_range_policy_type = Kokkos::RangePolicy<host_exec_space>;
648 const ordinal_type numCells = basisCoeffs.extent(0);
649 const ordinal_type numRows = m+n, numCols = n;
652 Teuchos::LAPACK<ordinal_type,value_type> lapack;
655 Kokkos::View<ordinal_type*,host_device_type> elemDof_host(do_not_init_tag(
"elemDof_host"), elemDof.extent(0));
657 auto elemDof_device = Kokkos::create_mirror_view(
typename device_type::memory_space(), elemDof_host);
658 Kokkos::deep_copy(elemDof_device, elemDof); Kokkos::fence();
659 Kokkos::deep_copy(elemDof_host, elemDof_device);
663 auto elemRhs_host = Kokkos::create_mirror_view_and_copy(host_memory_space(), elemRhs);
664 auto elemMat_host = Kokkos::create_mirror_view_and_copy(host_memory_space(), elemMat);
667 auto basisCoeffs_host = Kokkos::create_mirror_view_and_copy(host_memory_space(), basisCoeffs);
669 if (matrixIndependentOfCell_) {
671 matrix_host_type A(do_not_init_tag(
"A"), numRows, numRows);
673 for (ordinal_type j=0;j<numRows;++j)
674 for (ordinal_type i=0;i<numRows;++i)
675 A(i, j) = elemMat_host(0, i, j);
677 for (ordinal_type j=0;j<numCols;++j)
678 for (ordinal_type i=numCols;i<numRows;++i)
682 ordinal_type lwork(-1);
684 ordinal_type info(0);
687 numRows, numRows, numCells,
688 nullptr, std::max(1,numRows),
689 nullptr, std::max(1,numRows),
695 matrix_host_type C(do_not_init_tag(
"C"), numRows, numCells);
697 host_range_policy_type policy(0, numCells);
700 (
"ProjectionTools::solveHost::matrixIndependentOfCell::pack",
701 policy, [=](
const ordinal_type & ic) {
702 for (ordinal_type i=0;i<numRows;++i)
703 C(i,ic) = elemRhs_host(ic, i);
708 vector_host_type work(do_not_init_tag(
"work"), lwork);
709 ordinal_type info(0);
711 numRows, numRows, numCells,
712 A.data(), std::max(1,numRows),
713 C.data(), std::max(1,numRows),
716 INTREPID2_TEST_FOR_EXCEPTION
717 (info != 0, std::runtime_error,
"GELS return non-zero info code");
721 (
"ProjectionTools::solveHost::matrixIndependentOfCell::unpack",
722 policy, [=](
const ordinal_type & ic) {
723 for (ordinal_type i=0;i<numCols;++i)
724 basisCoeffs_host(ic,elemDof_host(i)) = C(i,ic);
728 const ordinal_type level(0);
729 host_team_policy_type policy(numCells, 1, 1);
732 ordinal_type lwork(-1);
734 ordinal_type info(0);
738 nullptr, std::max(1,numRows),
739 nullptr, std::max(1,numRows),
745 const ordinal_type per_team_extent = numRows*numRows + numRows + lwork;
746 const ordinal_type per_team_scratch = scratch_host_type::shmem_size(per_team_extent);
747 policy.set_scratch_size(level, Kokkos::PerTeam(per_team_scratch));
751 (
"ProjectionTools::solveHost::matrixDependentOfCell",
752 policy, [=](
const typename host_team_policy_type::member_type& member) {
753 const ordinal_type ic = member.league_rank();
755 scratch_host_type scratch(member.team_scratch(level), per_team_extent);
756 value_type * sptr = scratch.data();
759 matrix_host_type A(sptr, numRows, numRows); sptr += A.span();
760 for (ordinal_type j=0;j<numRows;++j)
761 for (ordinal_type i=0;i<numRows;++i)
762 A(i, j) = elemMat_host(ic, i, j);
764 for (ordinal_type j=0;j<numCols;++j)
765 for (ordinal_type i=numCols;i<numRows;++i)
768 vector_host_type c(sptr, numRows); sptr += c.span();
769 for (ordinal_type i=0;i<numRows;++i)
770 c(i) = elemRhs_host(ic, i);
772 vector_host_type work(sptr, lwork); sptr += work.span();
773 ordinal_type info(0);
776 A.data(), std::max(1,numRows),
777 c.data(), std::max(1,numRows),
780 INTREPID2_TEST_FOR_EXCEPTION
781 (info != 0, std::runtime_error,
"GELS return non-zero info code");
784 for (ordinal_type i=0;i<numCols;++i)
785 basisCoeffs_host(ic,elemDof_host(i)) = c(i);
788 Kokkos::deep_copy(basisCoeffs, basisCoeffs_host);
Header file for the Intrepid2::Basis_HDIV_TET_I1_FEM class.
Header file for the Intrepid2::Basis_HDIV_HEX_I1_FEM class.
Header file for the Intrepid2::Basis_HDIV_WEDGE_I1_FEM class.
Header function for Intrepid2::Util class and other utility functions.
Header file for the Intrepid2::Basis_HDIV_QUAD_I1_FEM class.
Header file for the Intrepid2::Basis_HDIV_TRI_I1_FEM class.
Contains definitions of custom data types in Intrepid2.
Header file for the Intrepid2::ProjectionStruct.
void createL2ProjectionStruct(const BasisPtrType cellBasis, const ordinal_type targetCubDegree)
Initialize the ProjectionStruct for L2 projections.
Header file for the Intrepid2::Basis_HCURL_QUAD_I1_FEM class.
Header file for the Intrepid2::Basis_HCURL_WEDGE_I1_FEM class.
Header file for the Intrepid2::Basis_HCURL_TET_I1_FEM class.
Header file for the Intrepid2::Basis_HCURL_HEX_I1_FEM class.
Header file for the Intrepid2::Basis_HCURL_TRI_I1_FEM class.
An helper class to compute the evaluation points and weights needed for performing projections...
Stateless class that acts as a factory for a family of nodal bases (hypercube topologies only at this...
Header file for the abstract base class Intrepid2::Basis.