14 #include "Intrepid2_OrientationTools.hpp"
15 #include "Intrepid2_LagrangianInterpolation.hpp"
16 #ifdef PANZER_HAVE_EPETRA_STACK
17 #include "Epetra_MpiComm.h"
24 template <
class Scalar,
31 using crs_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
32 using row_ptr_type =
typename crs_matrix::local_graph_device_type::row_map_type::non_const_type;
33 using col_idx_type =
typename crs_matrix::local_graph_device_type::entries_type::non_const_type;
34 using vals_type =
typename crs_matrix::local_matrix_device_type::values_type;
36 #if KOKKOS_VERSION >= 40799
37 using ATS = KokkosKernels::ArithTraits<Scalar>;
39 using ATS = Kokkos::ArithTraits<Scalar>;
41 using impl_SC =
typename ATS::val_type;
42 #if KOKKOS_VERSION >= 40799
43 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
45 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
48 auto lclA = A->getLocalMatrixDevice();
50 auto rowptr = row_ptr_type(
"rowptr", lclA.numRows() + 1);
53 "removeSmallEntries::rowptr1",
54 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
55 KOKKOS_LAMBDA(
const LocalOrdinal rlid) {
56 auto row = lclA.row(rlid);
57 for (LocalOrdinal k = 0; k < row.length; ++k) {
58 if (impl_ATS::magnitude(row.value(k)) > tol) {
59 rowptr(rlid + 1) += 1;
64 Kokkos::parallel_scan(
65 "removeSmallEntries::rowptr2",
66 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
67 KOKKOS_LAMBDA(
const LocalOrdinal rlid, LocalOrdinal& partial_nnz,
bool is_final) {
68 partial_nnz += rowptr(rlid + 1);
70 rowptr(rlid + 1) = partial_nnz;
74 auto idx = col_idx_type(
"idx", nnz);
75 auto vals = vals_type(
"vals", nnz);
78 "removeSmallEntries::indicesValues",
79 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
80 KOKKOS_LAMBDA(
const LocalOrdinal rlid) {
81 auto row = lclA.row(rlid);
82 auto I = rowptr(rlid);
83 for (LocalOrdinal k = 0; k < row.length; ++k) {
84 if (impl_ATS::magnitude(row.value(k)) > tol) {
85 idx(I) = row.colidx(k);
86 vals(I) = row.value(k);
93 auto newA =
Teuchos::rcp(
new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx,
vals));
94 newA->fillComplete(A->getDomainMap(),
101 const std::string &domain_basis_name,
const std::string &range_basis_name,
102 Intrepid2::EOperator op,
size_t worksetSize,
103 const bool matrixFree)
107 using Teuchos::rcp_dynamic_cast;
109 using Scalar = double;
112 #ifdef PANZER_HAVE_EPETRA_STACK
119 #ifdef PANZER_HAVE_EPETRA_STACK
124 if (tblof != Teuchos::null) {
125 blockedDOFMngr = tblof->getGlobalIndexer();
126 #ifdef PANZER_HAVE_EPETRA_STACK
127 }
else if (eblof != Teuchos::null) {
128 blockedDOFMngr = eblof->getGlobalIndexer();
135 std::vector<RCP<UGI> > fieldDOFMngrs = blockedDOFMngr->getFieldDOFManagers();
136 int domainFieldNum = blockedDOFMngr->getFieldNum(domain_basis_name);
137 int rangeFieldNum = blockedDOFMngr->getFieldNum(range_basis_name);
138 int domainBlockIndex = blockedDOFMngr->getFieldBlock(domainFieldNum);
139 int rangeBlockIndex = blockedDOFMngr->getFieldBlock(rangeFieldNum);
145 return buildInterpolation(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize,
147 tblof != Teuchos::null,
155 const std::string& domain_basis_name,
156 const std::string& range_basis_name,
157 Intrepid2::EOperator op,
159 const bool force_vectorial,
160 const bool useTpetra,
161 const bool matrixFree)
165 using Teuchos::rcp_dynamic_cast;
167 using Scalar = double;
170 #if KOKKOS_VERSION >= 40799
171 using KAT = KokkosKernels::ArithTraits<Scalar>;
173 using KAT = Kokkos::ArithTraits<Scalar>;
177 using DeviceSpace = PHX::Device;
178 using HostSpace = Kokkos::HostSpace;
179 using ots = Intrepid2::OrientationTools<DeviceSpace>;
180 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
181 using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
183 using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
184 using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
185 using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
186 #ifdef PANZER_HAVE_EPETRA_STACK
195 return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
196 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
201 auto domain_fieldPattern = domain_ugi->
getFieldPattern(domain_basis_name);
203 auto range_fieldPattern = range_ugi->
getFieldPattern(range_basis_name);
204 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
207 const size_t domainCardinality = domain_basis->getCardinality();
208 const size_t rangeCardinality = range_basis->getCardinality();
210 const int dim = range_basis->getBaseCellTopology().getDimension();
212 if (op == Intrepid2::OPERATOR_VALUE) {
223 typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
224 #ifdef PANZER_HAVE_EPETRA_STACK
232 auto rangeElementLIDs_d = range_ugi->
getLIDs();
233 auto domainElementLIDs_d = domain_ugi->
getLIDs();
236 size_t maxNumElementsPerBlock = 0;
237 LocalOrdinal minLocalIndex = 0;
238 LocalOrdinal maxLocalIndex = 0;
241 std::vector<GlobalOrdinal> gids;
243 tp_rowmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), range_ugi->
getComm()));
244 tp_rangemap = tp_rowmap;
246 tp_domainmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->
getComm()));
248 tp_colmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->
getComm()));
250 minLocalIndex = tp_rowmap->getMinLocalIndex();
251 maxLocalIndex = tp_rowmap->getMaxLocalIndex();
255 using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
256 dv numEntriesPerRow(
"numEntriesPerRow", tp_rowmap->getLocalNumElements());
258 auto numEntriesPerRow_d = numEntriesPerRow.view_device();
261 std::vector<std::string> elementBlockIds;
263 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
266 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
269 Kokkos::deep_copy(elementIds_d, elementIds_h);
270 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
272 Kokkos::parallel_for(
"MiniEM_Interpolation::numEntriesPerRow",
273 Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
274 KOKKOS_LAMBDA(
const size_t elemIter) {
275 auto elemId = elementIds_d(elemIter);
278 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
281 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
282 const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
283 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
285 Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
289 numEntriesPerRow.template modify<typename dv::t_dev>();
290 numEntriesPerRow.template sync<typename dv::t_host>();
294 auto tp_interp_graph =
rcp(
new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
299 Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
300 Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
303 std::vector<std::string> elementBlockIds;
305 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
308 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
309 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
310 for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
311 auto elemId = elementIds[elemIter];
314 auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
315 auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
318 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
319 const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
320 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
323 tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
331 pl->
set(
"Optimize Storage",
true);
332 tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
334 tp_interp_matrix =
rcp(
new tp_matrix(tp_interp_graph));
335 lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
337 #ifdef PANZER_HAVE_EPETRA_STACK
342 std::vector<GlobalOrdinal> gids;
344 ep_rowmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
345 ep_rangemap = ep_rowmap;
347 ep_domainmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
349 ep_colmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
353 std::vector<std::string> elementBlockIds;
355 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
358 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
359 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
364 size_t nnzPerRowEstimate = 25*domainCardinality;
366 ep_interp_matrix =
rcp(
new ep_matrix(
Copy, *ep_rowmap, *ep_colmap, static_cast<int>(nnzPerRowEstimate),
true));
370 Thyra::EPETRA_OP_APPLY_APPLY,
371 Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
372 Thyra::create_VectorSpace(ep_rangemap),
373 Thyra::create_VectorSpace(ep_domainmap));
380 if (maxNumElementsPerBlock > 0)
381 numCells =
static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
383 numCells =
static_cast<int>(worksetSize);
385 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
386 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
389 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
390 DynRankDeviceView valuesAtDofCoordsOriented_d;
392 if (!force_vectorial) {
394 auto temp = domain_basis->allocateOutputView(static_cast<int>(rangeCardinality), op);
399 if (temp.rank() == 3) {
400 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
401 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
403 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
404 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
407 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
408 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
411 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
417 range_basis->getDofCoords(range_dofCoords_d);
420 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
423 std::vector<std::string> elementBlockIds;
427 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
431 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
435 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
436 std::cout <<
"rangeOffsets_d" << std::endl;
437 for (
int i = 0; i < rangeCardinality; i++)
438 std::cout << rangeOffsets_d(i) <<
" ";
439 std::cout << std::endl;
440 std::cout <<
"domainOffsets_d" << std::endl;
441 for (
int i = 0; i < domainCardinality; i++)
442 std::cout << domainOffsets_d(i) <<
" ";
443 std::cout << std::endl;
449 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
452 Kokkos::deep_copy(elementIds_d, elementIds_h);
456 typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d (
"elemOrts_d", elementIds_d.extent(0));
459 auto blockOrientations = orientations[elementBlockIds[blockIter]];
461 Kokkos::deep_copy(elemOrts_d, elemOrts_h);
465 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
468 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
469 Teuchos::as<int>(elemIter));
472 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
473 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
475 auto worksetRange = Kokkos::make_pair(0, endCellRange);
476 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
477 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
481 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
482 valuesAtDofCoordsNonOriented_d,
487 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
490 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
491 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
495 #ifdef PANZER_HAVE_EPETRA_STACK
502 for (
int cellNo = 0; cellNo < endCellRange; cellNo++) {
503 auto elemId = elementIds_d(elemIter+cellNo);
506 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
507 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
510 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
511 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
512 const bool isOwned = ep_rowmap->MyLID(range_row);
515 LocalOrdinal rowNNZ = 0;
516 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
517 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
518 if (KAT::magnitude(val) > entryFilterTol) {
519 indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
520 values_d(rowNNZ) = val;
525 int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
527 ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
537 Kokkos::parallel_for(
538 "MiniEM_Interpolation::worksetLoop",
539 Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
540 KOKKOS_LAMBDA(
const int cellNo) {
541 auto elemId = elementIds_d(elemIter+cellNo);
544 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
545 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
547 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
548 std::cout <<
"\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
549 std::cout <<
"rangeLIDs" << std::endl;
550 for (
int i = 0; i < rangeCardinality; i++)
551 std::cout << rangeLIDs_d(i) <<
" ";
552 std::cout << std::endl <<
"domainLIDs" << std::endl;
553 for (
int i = 0; i < domainCardinality; i++)
554 std::cout << domainLIDs_d(i) <<
" ";
555 std::cout << std::endl;
558 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
559 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
560 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
563 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
564 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
565 if (KAT::magnitude(val) > entryFilterTol) {
567 #if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
570 auto row = lcl_tp_interp_matrix.rowConst(range_row);
571 for(LocalOrdinal kk = 0; kk<row.length; ++kk)
572 if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
573 if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
574 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
575 std::cout <<
"Replacing (" << range_row <<
"," << row.colidx(kk) <<
") = " << row.value(kk) <<
" with " << val << std::endl;
578 Kokkos::abort(
"MiniEM interpolation worksetLoop failed!");
583 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
584 std::cout <<
"Setting (" << range_row <<
"," << domainLIDs_d(domainOffsets_d(domainIter)) <<
") = " << val << std::endl;
586 lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val,
false,
true);
597 tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
599 if (op != Intrepid2::OPERATOR_VALUE) {
604 thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
605 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
608 #ifdef PANZER_HAVE_EPETRA_STACK
610 ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
616 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
621 const std::string& _domain_basis_name,
622 const std::string& _range_basis_name,
623 Intrepid2::EOperator _op,
624 size_t _worksetSize) :
626 domain_basis_name(_domain_basis_name),
627 range_basis_name(_range_basis_name),
629 worksetSize(_worksetSize),
630 domain_ugi(_domain_ugi),
631 range_ugi(_range_ugi)
636 using Teuchos::rcp_dynamic_cast;
641 typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal> tp_map;
650 std::vector<GlobalOrdinal> gids;
652 tp_rowmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
range_ugi->getComm()));
653 tp_rangemap = tp_rowmap;
655 tp_domainmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
domain_ugi->getComm()));
657 tp_colmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
domain_ugi->getComm()));
671 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
675 colmapMV_ =
rcp(
new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(columnMap_, numVectors));
679 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
683 size_t maxNumElementsPerBlock = 0;
685 if (maxNumElementsPerBlock > 0)
686 numCells = std::min(maxNumElementsPerBlock, worksetSize);
688 numCells = worksetSize;
690 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
693 auto rangeLIDs_d = range_ugi->getLIDs();
695 auto owner_d = owner_d_;
698 std::vector<std::string> elementBlockIds;
699 range_ugi->getElementBlockIds(elementBlockIds);
700 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
703 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
706 Kokkos::deep_copy(elementIds_d, elementIds_h);
707 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
708 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
709 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop",
710 range_type(elemIter, std::min(elemIter+numCells,
711 elementIds_d.extent(0))),
712 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
713 auto elemId = elementIds_d(cellNo2);
716 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
717 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeIter);
718 if (range_row < lclTargetSize)
719 owner_d(range_row) = elemId;
726 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
729 using HostSpace = Kokkos::HostSpace;
732 for (
auto const& orientation : orientations) {
733 auto blockOrientations = orientation.second;
734 orientations_[orientation.first] =
typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace>(
"elemOrts_d", blockOrientations.size());
736 Kokkos::deep_copy(orientations_[orientation.first], elemOrts_h);
740 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
743 apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
744 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
750 applyNonTransposed(X, Y, alpha, beta);
753 applyTransposed(X, Y, alpha, beta);
759 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
763 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
769 using Teuchos::rcp_dynamic_cast;
770 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
772 using ots = Intrepid2::OrientationTools<DeviceSpace>;
773 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
774 using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
775 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
776 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
781 const Scalar ZERO = TST::zero();
784 colmapMV_->doImport(X, *import_,Tpetra::INSERT);
786 const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
787 view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
788 size_t numVectors = lclY.extent(1);
789 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
792 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
794 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
795 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
798 const size_t domainCardinality = domain_basis->getCardinality();
799 const size_t rangeCardinality = range_basis->getCardinality();
801 size_t maxNumElementsPerBlock = 0;
803 const int dim = range_basis->getBaseCellTopology().getDimension();
805 if (op == Intrepid2::OPERATOR_VALUE) {
812 if (maxNumElementsPerBlock > 0)
813 numCells = std::min(maxNumElementsPerBlock, worksetSize);
815 numCells = worksetSize;
816 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
817 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
820 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
821 DynRankDeviceView valuesAtDofCoordsOriented_d;
822 DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
826 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
831 if (temp.rank() == 3) {
832 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
833 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
834 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
836 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
837 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
838 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
842 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
845 auto rangeLIDs_d = range_ugi->getLIDs();
846 auto domainLIDs_d = domain_ugi->getLIDs();
849 range_basis->getDofCoords(range_dofCoords_d);
852 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
855 std::vector<std::string> elementBlockIds;
856 range_ugi->getElementBlockIds(elementBlockIds);
857 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
859 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
860 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
865 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
868 Kokkos::deep_copy(elementIds_d, elementIds_h);
872 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
874 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
877 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
878 Teuchos::as<int>(elemIter));
881 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
882 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
884 auto worksetRange = Kokkos::make_pair(0, endCellRange);
885 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
886 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
890 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
891 valuesAtDofCoordsNonOriented_d,
895 Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
897 if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
898 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
899 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
900 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
901 LocalOrdinal cellNo2 = elemIter+cellNo;
902 LocalOrdinal elemId = elementIds_d(cellNo2);
903 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
904 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
905 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
906 for(
size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
907 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
908 for (
size_t j = 0; j<numVectors; ++j)
909 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
915 for (
size_t j = 0; j<numVectors; ++j)
916 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
917 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
922 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
923 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
924 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
925 LocalOrdinal cellNo2 = elemIter+cellNo;
926 LocalOrdinal elemId = elementIds_d(cellNo2);
927 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
928 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
929 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
930 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
931 for (
size_t j = 0; j<numVectors; ++j)
932 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
937 for (
size_t j = 0; j<numVectors; ++j)
938 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
939 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
945 auto owner_d = owner_d_;
947 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop2",
948 range_type(elemIter, std::min(elemIter+numCells,
949 elementIds_d.extent(0))),
950 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
951 LocalOrdinal cellNo = cellNo2-elemIter;
952 LocalOrdinal elemId = elementIds_d(cellNo2);
955 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
956 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
959 if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
961 for (
size_t j = 0; j<numVectors; ++j) {
962 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
963 lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
976 applyTransposed (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
977 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
983 using Teuchos::rcp_dynamic_cast;
984 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
986 typedef Intrepid2::OrientationTools<DeviceSpace> ots;
987 typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
988 typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
990 const Scalar ZERO = TST::zero();
991 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
992 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
996 const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
997 colmapMV_->putScalar(ZERO);
998 view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1001 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
1003 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
1004 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
1007 const size_t domainCardinality = domain_basis->getCardinality();
1008 const size_t rangeCardinality = range_basis->getCardinality();
1010 size_t maxNumElementsPerBlock = 0;
1012 const int dim = range_basis->getBaseCellTopology().getDimension();
1014 if (op == Intrepid2::OPERATOR_VALUE) {
1021 if (maxNumElementsPerBlock > 0)
1022 numCells = std::min(maxNumElementsPerBlock, worksetSize);
1024 numCells = worksetSize;
1025 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
1026 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1029 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1030 DynRankDeviceView valuesAtDofCoordsOriented_d;
1034 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1039 if (temp.rank() == 3) {
1040 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1041 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1043 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1044 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1048 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1051 auto rangeLIDs_d = range_ugi->getLIDs();
1052 auto domainLIDs_d = domain_ugi->getLIDs();
1056 range_basis->getDofCoords(range_dofCoords_d);
1059 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1062 std::vector<std::string> elementBlockIds;
1063 range_ugi->getElementBlockIds(elementBlockIds);
1064 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1066 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1067 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1072 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1075 Kokkos::deep_copy(elementIds_d, elementIds_h);
1079 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1081 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1084 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1085 Teuchos::as<int>(elemIter));
1088 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1089 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1091 auto worksetRange = Kokkos::make_pair(0, endCellRange);
1092 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1093 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1097 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1098 valuesAtDofCoordsNonOriented_d,
1100 domain_basis.get());
1104 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1107 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1108 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1109 range_basis.get(), elemOrtsWorkset_d);
1112 auto owner_d = owner_d_;
1115 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop",
1116 range_type(elemIter, std::min(elemIter+numCells,
1117 elementIds_d.extent(0))),
1118 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
1119 LocalOrdinal cellNo = cellNo2-elemIter;
1120 LocalOrdinal elemId = elementIds_d(cellNo2);
1123 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1124 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1127 if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1129 for(
size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1130 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1131 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1132 for (
size_t j = 0; j<lclYtemp.extent(1); ++j)
1133 Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1146 Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);
Teuchos::RCP< panzer::DOFManager > range_ugi
void getElementBlockIds(std::vector< std::string > &elementBlockIds) const
MatrixFreeInterpolationOp(const Teuchos::RCP< const panzer::ConnManager > &conn, const Teuchos::RCP< panzer::DOFManager > &_domain_ugi, const Teuchos::RCP< panzer::DOFManager > &_range_ugi, const std::string &_domain_basis_name, const std::string &_range_basis_name, Intrepid2::EOperator _op=Intrepid2::OPERATOR_VALUE, size_t _worksetSize=1000)
static magnitudeType eps()
const PHX::View< const int * > getGIDFieldOffsetsKokkos(const std::string &blockID, int fieldNum) const
const Kokkos::View< const panzer::LocalOrdinal **, Kokkos::LayoutRight, PHX::Device > getLIDs() const
void getOwnedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of indices owned by this processor.
void applyNonTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void buildIntrepidOrientations(const std::vector< std::string > &eBlockNames, const panzer::ConnManager &connMgrInput, std::map< std::string, std::vector< Intrepid2::Orientation >> &orientations)
Builds the element orientations for the specified element blocks.
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > domainMap_
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol)
static RCP< Time > getNewTimer(const std::string &name)
void applyTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Teuchos::RCP< Thyra::LinearOpBase< double > > buildInterpolation(const Teuchos::RCP< const panzer::LinearObjFactory< panzer::Traits >> &linObjFactory, const std::string &domain_basis_name, const std::string &range_basis_name, Intrepid2::EOperator op, size_t worksetSize, const bool matrixFree)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
void precomputeOwnersAndOrientations(const Teuchos::RCP< const panzer::ConnManager > &conn)
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > rangeMap_
Teuchos::RCP< Teuchos::Comm< int > > getComm() const
void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > columnMap_
Teuchos::RCP< const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > > import_
void allocateColumnMapVector(size_t numVectors)
void getOwnedAndGhostedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of owned and ghosted indices for this processor.
#define TEUCHOS_ASSERT(assertion_test)
Teuchos::RCP< panzer::DOFManager > domain_ugi
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
const std::vector< panzer::LocalOrdinal > & getElementBlock(const std::string &blockId) const
Get the owned element block.
Teuchos::RCP< const FieldPattern > getFieldPattern(const std::string &name) const
Find a field pattern stored for a particular block and field number. This will retrive the pattern ad...