14 #include "Intrepid2_OrientationTools.hpp"
15 #include "Intrepid2_LagrangianInterpolation.hpp"
16 #ifdef PANZER_HAVE_EPETRA_STACK
17 #include "Epetra_MpiComm.h"
24 template <
class Scalar,
31 using crs_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
32 using row_ptr_type =
typename crs_matrix::local_graph_device_type::row_map_type::non_const_type;
33 using col_idx_type =
typename crs_matrix::local_graph_device_type::entries_type::non_const_type;
34 using vals_type =
typename crs_matrix::local_matrix_device_type::values_type;
36 using ATS = Kokkos::ArithTraits<Scalar>;
37 using impl_SC =
typename ATS::val_type;
38 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
40 auto lclA = A->getLocalMatrixDevice();
42 auto rowptr = row_ptr_type(
"rowptr", lclA.numRows() + 1);
45 "removeSmallEntries::rowptr1",
46 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
47 KOKKOS_LAMBDA(
const LocalOrdinal rlid) {
48 auto row = lclA.row(rlid);
49 for (LocalOrdinal k = 0; k < row.length; ++k) {
50 if (impl_ATS::magnitude(row.value(k)) > tol) {
51 rowptr(rlid + 1) += 1;
56 Kokkos::parallel_scan(
57 "removeSmallEntries::rowptr2",
58 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
59 KOKKOS_LAMBDA(
const LocalOrdinal rlid, LocalOrdinal& partial_nnz,
bool is_final) {
60 partial_nnz += rowptr(rlid + 1);
62 rowptr(rlid + 1) = partial_nnz;
66 auto idx = col_idx_type(
"idx", nnz);
67 auto vals = vals_type(
"vals", nnz);
70 "removeSmallEntries::indicesValues",
71 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
72 KOKKOS_LAMBDA(
const LocalOrdinal rlid) {
73 auto row = lclA.row(rlid);
74 auto I = rowptr(rlid);
75 for (LocalOrdinal k = 0; k < row.length; ++k) {
76 if (impl_ATS::magnitude(row.value(k)) > tol) {
77 idx(I) = row.colidx(k);
78 vals(I) = row.value(k);
85 auto newA =
Teuchos::rcp(
new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx,
vals));
86 newA->fillComplete(A->getDomainMap(),
93 const std::string &domain_basis_name,
const std::string &range_basis_name,
94 Intrepid2::EOperator op,
size_t worksetSize,
95 const bool matrixFree)
99 using Teuchos::rcp_dynamic_cast;
101 using Scalar = double;
104 #ifdef PANZER_HAVE_EPETRA_STACK
111 #ifdef PANZER_HAVE_EPETRA_STACK
116 if (tblof != Teuchos::null) {
117 blockedDOFMngr = tblof->getGlobalIndexer();
118 #ifdef PANZER_HAVE_EPETRA_STACK
119 }
else if (eblof != Teuchos::null) {
120 blockedDOFMngr = eblof->getGlobalIndexer();
127 std::vector<RCP<UGI> > fieldDOFMngrs = blockedDOFMngr->getFieldDOFManagers();
128 int domainFieldNum = blockedDOFMngr->getFieldNum(domain_basis_name);
129 int rangeFieldNum = blockedDOFMngr->getFieldNum(range_basis_name);
130 int domainBlockIndex = blockedDOFMngr->getFieldBlock(domainFieldNum);
131 int rangeBlockIndex = blockedDOFMngr->getFieldBlock(rangeFieldNum);
137 return buildInterpolation(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize,
139 tblof != Teuchos::null,
147 const std::string& domain_basis_name,
148 const std::string& range_basis_name,
149 Intrepid2::EOperator op,
151 const bool force_vectorial,
152 const bool useTpetra,
153 const bool matrixFree)
157 using Teuchos::rcp_dynamic_cast;
159 using Scalar = double;
162 using KAT = Kokkos::ArithTraits<Scalar>;
165 using DeviceSpace = PHX::Device;
166 using HostSpace = Kokkos::HostSpace;
167 using ots = Intrepid2::OrientationTools<DeviceSpace>;
168 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
169 using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
171 using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
172 using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
173 using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
174 #ifdef PANZER_HAVE_EPETRA_STACK
184 return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
185 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
190 auto domain_fieldPattern = domain_ugi->
getFieldPattern(domain_basis_name);
192 auto range_fieldPattern = range_ugi->
getFieldPattern(range_basis_name);
193 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
196 const size_t domainCardinality = domain_basis->getCardinality();
197 const size_t rangeCardinality = range_basis->getCardinality();
199 const int dim = range_basis->getBaseCellTopology().getDimension();
201 if (op == Intrepid2::OPERATOR_VALUE) {
212 typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
213 #ifdef PANZER_HAVE_EPETRA_STACK
221 auto rangeElementLIDs_d = range_ugi->
getLIDs();
222 auto domainElementLIDs_d = domain_ugi->
getLIDs();
225 size_t maxNumElementsPerBlock = 0;
226 LocalOrdinal minLocalIndex = 0;
227 LocalOrdinal maxLocalIndex = 0;
230 std::vector<GlobalOrdinal> gids;
232 tp_rowmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), range_ugi->
getComm()));
233 tp_rangemap = tp_rowmap;
235 tp_domainmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->
getComm()));
237 tp_colmap =
rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->
getComm()));
239 minLocalIndex = tp_rowmap->getMinLocalIndex();
240 maxLocalIndex = tp_rowmap->getMaxLocalIndex();
244 using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
245 dv numEntriesPerRow(
"numEntriesPerRow", tp_rowmap->getLocalNumElements());
247 auto numEntriesPerRow_d = numEntriesPerRow.view_device();
250 std::vector<std::string> elementBlockIds;
252 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
255 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
258 Kokkos::deep_copy(elementIds_d, elementIds_h);
259 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
261 Kokkos::parallel_for(
"MiniEM_Interpolation::numEntriesPerRow",
262 Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
263 KOKKOS_LAMBDA(
const size_t elemIter) {
264 auto elemId = elementIds_d(elemIter);
267 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
270 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
271 const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
272 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
274 Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
278 numEntriesPerRow.template modify<typename dv::t_dev>();
279 numEntriesPerRow.template sync<typename dv::t_host>();
283 auto tp_interp_graph =
rcp(
new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
288 Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
289 Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
292 std::vector<std::string> elementBlockIds;
294 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
297 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
298 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
299 for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
300 auto elemId = elementIds[elemIter];
303 auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
304 auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
307 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
308 const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
309 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
312 tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
320 pl->
set(
"Optimize Storage",
true);
321 tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
323 tp_interp_matrix =
rcp(
new tp_matrix(tp_interp_graph));
324 lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
326 #ifdef PANZER_HAVE_EPETRA_STACK
331 std::vector<GlobalOrdinal> gids;
333 ep_rowmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
334 ep_rangemap = ep_rowmap;
336 ep_domainmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
338 ep_colmap =
rcp(
new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
342 std::vector<std::string> elementBlockIds;
344 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
347 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
348 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
353 size_t nnzPerRowEstimate = 25*domainCardinality;
355 ep_interp_matrix =
rcp(
new ep_matrix(
Copy, *ep_rowmap, *ep_colmap, static_cast<int>(nnzPerRowEstimate),
true));
359 Thyra::EPETRA_OP_APPLY_APPLY,
360 Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
361 Thyra::create_VectorSpace(ep_rangemap),
362 Thyra::create_VectorSpace(ep_domainmap));
369 if (maxNumElementsPerBlock > 0)
370 numCells =
static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
372 numCells =
static_cast<int>(worksetSize);
374 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
375 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
378 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
379 DynRankDeviceView valuesAtDofCoordsOriented_d;
381 if (!force_vectorial) {
383 auto temp = domain_basis->allocateOutputView(static_cast<int>(rangeCardinality), op);
388 if (temp.rank() == 3) {
389 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
390 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
392 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
393 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
396 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
397 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
400 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
406 range_basis->getDofCoords(range_dofCoords_d);
409 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
412 std::vector<std::string> elementBlockIds;
416 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
420 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
424 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
425 std::cout <<
"rangeOffsets_d" << std::endl;
426 for (
int i = 0; i < rangeCardinality; i++)
427 std::cout << rangeOffsets_d(i) <<
" ";
428 std::cout << std::endl;
429 std::cout <<
"domainOffsets_d" << std::endl;
430 for (
int i = 0; i < domainCardinality; i++)
431 std::cout << domainOffsets_d(i) <<
" ";
432 std::cout << std::endl;
438 std::vector<int> elementIds = range_ugi->
getElementBlock(elementBlockIds[blockIter]);
441 Kokkos::deep_copy(elementIds_d, elementIds_h);
445 typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d (
"elemOrts_d", elementIds_d.extent(0));
448 auto blockOrientations = orientations[elementBlockIds[blockIter]];
450 Kokkos::deep_copy(elemOrts_d, elemOrts_h);
454 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
457 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
458 Teuchos::as<int>(elemIter));
461 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
462 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
464 auto worksetRange = Kokkos::make_pair(0, endCellRange);
465 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
466 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
470 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
471 valuesAtDofCoordsNonOriented_d,
476 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
479 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
480 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
484 #ifdef PANZER_HAVE_EPETRA_STACK
491 for (
int cellNo = 0; cellNo < endCellRange; cellNo++) {
492 auto elemId = elementIds_d(elemIter+cellNo);
495 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
496 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
499 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
500 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
501 const bool isOwned = ep_rowmap->MyLID(range_row);
504 LocalOrdinal rowNNZ = 0;
505 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
506 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
507 if (KAT::magnitude(val) > entryFilterTol) {
508 indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
509 values_d(rowNNZ) = val;
514 int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
516 ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
526 Kokkos::parallel_for(
527 "MiniEM_Interpolation::worksetLoop",
528 Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
529 KOKKOS_LAMBDA(
const int cellNo) {
530 auto elemId = elementIds_d(elemIter+cellNo);
533 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
534 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
536 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
537 std::cout <<
"\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
538 std::cout <<
"rangeLIDs" << std::endl;
539 for (
int i = 0; i < rangeCardinality; i++)
540 std::cout << rangeLIDs_d(i) <<
" ";
541 std::cout << std::endl <<
"domainLIDs" << std::endl;
542 for (
int i = 0; i < domainCardinality; i++)
543 std::cout << domainLIDs_d(i) <<
" ";
544 std::cout << std::endl;
547 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
548 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
549 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
552 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
553 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
554 if (KAT::magnitude(val) > entryFilterTol) {
556 #if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
559 auto row = lcl_tp_interp_matrix.rowConst(range_row);
560 for(LocalOrdinal kk = 0; kk<row.length; ++kk)
561 if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
562 if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
563 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
564 std::cout <<
"Replacing (" << range_row <<
"," << row.colidx(kk) <<
") = " << row.value(kk) <<
" with " << val << std::endl;
572 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
573 std::cout <<
"Setting (" << range_row <<
"," << domainLIDs_d(domainOffsets_d(domainIter)) <<
") = " << val << std::endl;
575 lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val,
false,
true);
586 tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
588 if (op != Intrepid2::OPERATOR_VALUE) {
593 thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
594 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
597 #ifdef PANZER_HAVE_EPETRA_STACK
599 ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
605 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
610 const std::string& _domain_basis_name,
611 const std::string& _range_basis_name,
612 Intrepid2::EOperator _op,
613 size_t _worksetSize) :
615 domain_basis_name(_domain_basis_name),
616 range_basis_name(_range_basis_name),
618 worksetSize(_worksetSize),
619 domain_ugi(_domain_ugi),
620 range_ugi(_range_ugi)
625 using Teuchos::rcp_dynamic_cast;
630 typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal> tp_map;
639 std::vector<GlobalOrdinal> gids;
641 tp_rowmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
range_ugi->getComm()));
642 tp_rangemap = tp_rowmap;
644 tp_domainmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
domain_ugi->getComm()));
646 tp_colmap =
rcp(
new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(),
domain_ugi->getComm()));
660 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
664 colmapMV_ =
rcp(
new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(columnMap_, numVectors));
668 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
672 size_t maxNumElementsPerBlock = 0;
674 if (maxNumElementsPerBlock > 0)
675 numCells = std::min(maxNumElementsPerBlock, worksetSize);
677 numCells = worksetSize;
679 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
682 auto rangeLIDs_d = range_ugi->getLIDs();
684 auto owner_d = owner_d_;
687 std::vector<std::string> elementBlockIds;
688 range_ugi->getElementBlockIds(elementBlockIds);
689 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
692 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
695 Kokkos::deep_copy(elementIds_d, elementIds_h);
696 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
697 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
698 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop",
699 range_type(elemIter, std::min(elemIter+numCells,
700 elementIds_d.extent(0))),
701 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
702 auto elemId = elementIds_d(cellNo2);
705 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
706 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeIter);
707 if (range_row < lclTargetSize)
708 owner_d(range_row) = elemId;
715 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
718 using HostSpace = Kokkos::HostSpace;
721 for (
auto const& orientation : orientations) {
722 auto blockOrientations = orientation.second;
723 orientations_[orientation.first] =
typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace>(
"elemOrts_d", blockOrientations.size());
725 Kokkos::deep_copy(orientations_[orientation.first], elemOrts_h);
729 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
732 apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
733 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
739 applyNonTransposed(X, Y, alpha, beta);
742 applyTransposed(X, Y, alpha, beta);
748 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
752 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
758 using Teuchos::rcp_dynamic_cast;
759 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
761 using ots = Intrepid2::OrientationTools<DeviceSpace>;
762 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
763 using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
764 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
765 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
770 const Scalar ZERO = TST::zero();
773 colmapMV_->doImport(X, *import_,Tpetra::INSERT);
775 const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
776 view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
777 size_t numVectors = lclY.extent(1);
778 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
781 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
783 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
784 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
787 const size_t domainCardinality = domain_basis->getCardinality();
788 const size_t rangeCardinality = range_basis->getCardinality();
790 size_t maxNumElementsPerBlock = 0;
792 const int dim = range_basis->getBaseCellTopology().getDimension();
794 if (op == Intrepid2::OPERATOR_VALUE) {
801 if (maxNumElementsPerBlock > 0)
802 numCells = std::min(maxNumElementsPerBlock, worksetSize);
804 numCells = worksetSize;
805 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
806 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
809 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
810 DynRankDeviceView valuesAtDofCoordsOriented_d;
811 DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
815 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
820 if (temp.rank() == 3) {
821 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
822 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
823 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
825 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
826 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
827 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
831 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
834 auto rangeLIDs_d = range_ugi->getLIDs();
835 auto domainLIDs_d = domain_ugi->getLIDs();
838 range_basis->getDofCoords(range_dofCoords_d);
841 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
844 std::vector<std::string> elementBlockIds;
845 range_ugi->getElementBlockIds(elementBlockIds);
846 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
848 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
849 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
854 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
857 Kokkos::deep_copy(elementIds_d, elementIds_h);
861 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
863 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
866 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
867 Teuchos::as<int>(elemIter));
870 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
871 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
873 auto worksetRange = Kokkos::make_pair(0, endCellRange);
874 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
875 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
879 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
880 valuesAtDofCoordsNonOriented_d,
884 Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
886 if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
887 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
888 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
889 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
890 LocalOrdinal cellNo2 = elemIter+cellNo;
891 LocalOrdinal elemId = elementIds_d(cellNo2);
892 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
893 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
894 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
895 for(
size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
896 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
897 for (
size_t j = 0; j<numVectors; ++j)
898 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
904 for (
size_t j = 0; j<numVectors; ++j)
905 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
906 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
911 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
912 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
913 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
914 LocalOrdinal cellNo2 = elemIter+cellNo;
915 LocalOrdinal elemId = elementIds_d(cellNo2);
916 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
917 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
918 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
919 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
920 for (
size_t j = 0; j<numVectors; ++j)
921 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
926 for (
size_t j = 0; j<numVectors; ++j)
927 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
928 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
934 auto owner_d = owner_d_;
936 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop2",
937 range_type(elemIter, std::min(elemIter+numCells,
938 elementIds_d.extent(0))),
939 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
940 LocalOrdinal cellNo = cellNo2-elemIter;
941 LocalOrdinal elemId = elementIds_d(cellNo2);
944 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
945 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
948 if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
950 for (
size_t j = 0; j<numVectors; ++j) {
951 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
952 lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
962 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
965 applyTransposed (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
966 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
972 using Teuchos::rcp_dynamic_cast;
973 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
975 typedef Intrepid2::OrientationTools<DeviceSpace> ots;
976 typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
977 typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
979 const Scalar ZERO = TST::zero();
980 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
981 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
985 const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
986 colmapMV_->putScalar(ZERO);
987 view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
990 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
992 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
993 auto range_basis = rcp_dynamic_cast<
const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
996 const size_t domainCardinality = domain_basis->getCardinality();
997 const size_t rangeCardinality = range_basis->getCardinality();
999 size_t maxNumElementsPerBlock = 0;
1001 const int dim = range_basis->getBaseCellTopology().getDimension();
1003 if (op == Intrepid2::OPERATOR_VALUE) {
1010 if (maxNumElementsPerBlock > 0)
1011 numCells = std::min(maxNumElementsPerBlock, worksetSize);
1013 numCells = worksetSize;
1014 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
1015 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1018 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1019 DynRankDeviceView valuesAtDofCoordsOriented_d;
1023 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1028 if (temp.rank() == 3) {
1029 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1030 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1032 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1033 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1037 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1040 auto rangeLIDs_d = range_ugi->getLIDs();
1041 auto domainLIDs_d = domain_ugi->getLIDs();
1045 range_basis->getDofCoords(range_dofCoords_d);
1048 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1051 std::vector<std::string> elementBlockIds;
1052 range_ugi->getElementBlockIds(elementBlockIds);
1053 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1055 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1056 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1061 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1064 Kokkos::deep_copy(elementIds_d, elementIds_h);
1068 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1070 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1073 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1074 Teuchos::as<int>(elemIter));
1077 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1078 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1080 auto worksetRange = Kokkos::make_pair(0, endCellRange);
1081 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1082 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1086 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1087 valuesAtDofCoordsNonOriented_d,
1089 domain_basis.get());
1093 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1096 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1097 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1098 range_basis.get(), elemOrtsWorkset_d);
1101 auto owner_d = owner_d_;
1104 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop",
1105 range_type(elemIter, std::min(elemIter+numCells,
1106 elementIds_d.extent(0))),
1107 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
1108 LocalOrdinal cellNo = cellNo2-elemIter;
1109 LocalOrdinal elemId = elementIds_d(cellNo2);
1112 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1113 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1116 if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1118 for(
size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1119 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1120 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1121 for (
size_t j = 0; j<lclYtemp.extent(1); ++j)
1122 Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1135 Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);
Teuchos::RCP< panzer::DOFManager > range_ugi
void getElementBlockIds(std::vector< std::string > &elementBlockIds) const
MatrixFreeInterpolationOp(const Teuchos::RCP< const panzer::ConnManager > &conn, const Teuchos::RCP< panzer::DOFManager > &_domain_ugi, const Teuchos::RCP< panzer::DOFManager > &_range_ugi, const std::string &_domain_basis_name, const std::string &_range_basis_name, Intrepid2::EOperator _op=Intrepid2::OPERATOR_VALUE, size_t _worksetSize=1000)
static magnitudeType eps()
const PHX::View< const int * > getGIDFieldOffsetsKokkos(const std::string &blockID, int fieldNum) const
const Kokkos::View< const panzer::LocalOrdinal **, Kokkos::LayoutRight, PHX::Device > getLIDs() const
void getOwnedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of indices owned by this processor.
void applyNonTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void buildIntrepidOrientations(const std::vector< std::string > &eBlockNames, const panzer::ConnManager &connMgrInput, std::map< std::string, std::vector< Intrepid2::Orientation >> &orientations)
Builds the element orientations for the specified element blocks.
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > domainMap_
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol)
static RCP< Time > getNewTimer(const std::string &name)
void applyTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Teuchos::RCP< Thyra::LinearOpBase< double > > buildInterpolation(const Teuchos::RCP< const panzer::LinearObjFactory< panzer::Traits >> &linObjFactory, const std::string &domain_basis_name, const std::string &range_basis_name, Intrepid2::EOperator op, size_t worksetSize, const bool matrixFree)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
void precomputeOwnersAndOrientations(const Teuchos::RCP< const panzer::ConnManager > &conn)
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > rangeMap_
Teuchos::RCP< Teuchos::Comm< int > > getComm() const
void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > columnMap_
Teuchos::RCP< const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > > import_
void allocateColumnMapVector(size_t numVectors)
void getOwnedAndGhostedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of owned and ghosted indices for this processor.
#define TEUCHOS_ASSERT(assertion_test)
Teuchos::RCP< panzer::DOFManager > domain_ugi
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
const std::vector< panzer::LocalOrdinal > & getElementBlock(const std::string &blockId) const
Get the owned element block.
Teuchos::RCP< const FieldPattern > getFieldPattern(const std::string &name) const
Find a field pattern stored for a particular block and field number. This will retrive the pattern ad...