11 #ifndef PANZER_SCATTER_RESIDUAL_BLOCKEDEPETRA_IMPL_HPP
12 #define PANZER_SCATTER_RESIDUAL_BLOCKEDEPETRA_IMPL_HPP
15 #include "Teuchos_Assert.hpp"
17 #include "Phalanx_DataLayout.hpp"
28 #include "Thyra_ProductVectorBase.hpp"
29 #include "Thyra_BlockedLinearOpBase.hpp"
30 #include "Thyra_TpetraVector.hpp"
31 #include "Thyra_TpetraLinearOp.hpp"
32 #include "Tpetra_CrsMatrix.hpp"
33 #include "KokkosSparse_CrsMatrix.hpp"
35 #include "Phalanx_DataLayout_MDALayout.hpp"
37 #include "Teuchos_FancyOStream.hpp"
39 template <
typename EvalT,
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
44 std::string scatterName = p.
get<std::string>(
"Scatter Name");
49 const std::vector<std::string>& names =
56 for (std::size_t eq = 0; eq < names.size(); ++eq) {
60 this->addDependentField(field.fieldTag());
64 this->addEvaluatedField(*scatterHolder);
66 this->setName(scatterName+
" Scatter Residual");
73 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
77 : globalIndexer_(indexer)
78 , globalDataKey_(
"Residual Scatter Container")
80 std::string scatterName = p.
get<std::string>(
"Scatter Name");
85 const std::vector<std::string>& names =
95 scatterFields_.resize(names.size());
96 for (std::size_t eq = 0; eq < names.size(); ++eq) {
100 this->addDependentField(scatterFields_[eq]);
104 this->addEvaluatedField(*scatterHolder_);
106 if (p.
isType<std::string>(
"Global Data Key"))
107 globalDataKey_ = p.
get<std::string>(
"Global Data Key");
109 this->setName(scatterName+
" Scatter Residual");
113 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
118 const Workset & workset_0 = (*d.worksets_)[0];
119 const std::string blockId = this->wda(workset_0).
block_id;
121 fieldIds_.resize(scatterFields_.size());
122 fieldOffsets_.resize(scatterFields_.size());
123 fieldGlobalIndexers_.resize(scatterFields_.size());
124 productVectorBlockIndex_.resize(scatterFields_.size());
125 int maxElementBlockGIDCount = -1;
126 for(std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
127 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
128 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
129 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
130 fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
131 fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName);
133 const std::vector<int>&
offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
134 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
135 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
136 for (std::size_t i=0; i < offsets.size(); ++i)
137 hostOffsets(i) = offsets[i];
138 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
140 maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
146 worksetLIDs_ = PHX::View<LO**>(
"ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
147 scatterFields_[0].extent(0),
148 maxElementBlockGIDCount);
152 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
157 using Teuchos::rcp_dynamic_cast;
160 blockedContainer_ = rcp_dynamic_cast<
const ContainerType>(d.gedc->getDataObject(globalDataKey_));
162 if(blockedContainer_==Teuchos::null) {
169 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
174 using Teuchos::rcp_dynamic_cast;
178 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
182 int currentWorksetLIDSubBlock = -1;
183 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
185 if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
186 fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
187 currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
190 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
191 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
194 const auto& fieldOffsets = fieldOffsets_[fieldIndex];
195 const auto& worksetLIDs = worksetLIDs_;
196 const auto& fieldValues = scatterFields_[fieldIndex].get_static_view();
198 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
199 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
200 const int lid = worksetLIDs(cell,fieldOffsets(basis));
201 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis));
211 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
215 : globalIndexer_(indexer)
216 , globalDataKey_(
"Residual Scatter Container")
218 std::string scatterName = p.
get<std::string>(
"Scatter Name");
223 const std::vector<std::string>& names =
233 scatterFields_.resize(names.size());
234 for (std::size_t eq = 0; eq < names.size(); ++eq) {
238 this->addDependentField(scatterFields_[eq]);
242 this->addEvaluatedField(*scatterHolder_);
244 if (p.
isType<std::string>(
"Global Data Key"))
245 globalDataKey_ = p.
get<std::string>(
"Global Data Key");
247 this->setName(scatterName+
" Scatter Residual (Jacobian)");
251 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
256 const Workset & workset_0 = (*d.worksets_)[0];
257 const std::string blockId = this->wda(workset_0).
block_id;
259 fieldIds_.resize(scatterFields_.size());
260 fieldOffsets_.resize(scatterFields_.size());
261 productVectorBlockIndex_.resize(scatterFields_.size());
262 for (std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
263 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
264 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
265 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
266 const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
267 fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName);
269 const std::vector<int>&
offsets = globalIndexer_->getGIDFieldOffsets(blockId,globalFieldNum);
270 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
271 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
272 for (std::size_t i=0; i < offsets.size(); ++i)
273 hostOffsets(i) = offsets[i];
274 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
280 int elementBlockGIDCount = 0;
281 for (
const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
282 elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
285 "ScatterResidual_BlockedTpetra(Jacobian):worksetLIDs_",
286 scatterFields_[0].extent(0), elementBlockGIDCount );
289 const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
290 const int numBlocks =
static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
291 blockOffsets_ = PHX::View<LO*>(
"ScatterResidual_BlockedTpetra(Jacobian):blockOffsets_",
293 const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
294 for (
int blk=0;blk<numBlocks;blk++) {
295 int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
296 hostBlockOffsets(blk) = blockOffset;
298 hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
299 Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
303 int max_blockDerivativeSize = 0;
304 for (
int blk=0;blk<numBlocks;blk++) {
305 const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
306 if ( blockDerivativeSize > max_blockDerivativeSize )
307 max_blockDerivativeSize = blockDerivativeSize;
310 "ScatterResidual_BlockedTpetra(Jacobian):workset_vals_",
311 scatterFields_[0].extent(0), max_blockDerivativeSize );
315 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
320 using Teuchos::rcp_dynamic_cast;
323 blockedContainer_ = rcp_dynamic_cast<
const ContainerType>(d.gedc->getDataObject(globalDataKey_));
325 if(blockedContainer_==Teuchos::null) {
332 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
337 using Teuchos::rcp_dynamic_cast;
342 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
344 const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
354 using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>,
size_t>;
355 typename PHX::View<LocalMatrixType**>::HostMirror
356 hostJacTpetraBlocks(
"panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
358 PHX::View<int**> blockExistsInJac = PHX::View<int**>(
"blockExistsInJac_",numFieldBlocks,numFieldBlocks);
359 auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
361 for (
int row=0; row < numFieldBlocks; ++row) {
362 for (
int col=0; col < numFieldBlocks; ++col) {
363 const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),
false);
364 if (
nonnull(thyraTpetraOperator)) {
380 const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),
true);
381 const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
382 const auto managedGraph = managedMatrix.graph;
385 using StaticCrsGraphType =
typename LocalMatrixType::StaticCrsGraphType;
386 StaticCrsGraphType unmanagedGraph;
387 unmanagedGraph.entries =
typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
388 unmanagedGraph.row_map =
typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
389 unmanagedGraph.row_block_offsets =
typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
391 typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
392 LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
393 new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
396 hostBlockExistsInJac(row,col) = 1;
399 hostBlockExistsInJac(row,col) = 0;
403 typename PHX::View<LocalMatrixType**>
404 jacTpetraBlocks(
"panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
405 Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
406 Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
413 const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
414 auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
415 Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
416 for (
size_t block=0; block < globalIndexers.size(); ++block) {
417 const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
418 globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
422 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
424 const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
425 typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
427 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),
true))->getTpetraVector());
428 kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
432 const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
433 const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
434 const PHX::View<const LO*> blockOffsets = blockOffsets_;
439 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
440 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
442 typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basis);
443 const int rowLID = worksetLIDs(cell,fieldOffsets(basis));
446 Kokkos::atomic_add(&kokkosResidual(rowLID,0), tmpFieldVal.val());
448 for (
int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
449 if (blockExistsInJac(blockRowIndex,blockColIndex)) {
450 const int start = blockOffsets(blockColIndex);
451 const int stop = blockOffsets(blockColIndex+1);
452 const int sensSize = stop-start;
454 for (
int i=0; i < sensSize; ++i)
455 workset_vals(cell,i) = tmpFieldVal.fastAccessDx(start+i);
457 jacTpetraBlocks(blockRowIndex,blockColIndex).sumIntoValues(rowLID, &worksetLIDs(cell,start), sensSize, &workset_vals(cell,0),
true,
true);
466 for (
int row=0; row < numFieldBlocks; ++row) {
467 for (
int col=0; col < numFieldBlocks; ++col) {
468 if (hostBlockExistsInJac(row,col)) {
469 hostJacTpetraBlocks(row,col).~CrsMatrix();
480 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
484 : globalIndexer_(indexer)
485 , globalDataKey_(
"Residual Scatter Container")
487 std::string scatterName = p.
get<std::string>(
"Scatter Name");
492 const std::vector<std::string>& names =
502 scatterFields_.resize(names.size());
503 for (std::size_t eq = 0; eq < names.size(); ++eq) {
507 this->addDependentField(scatterFields_[eq]);
511 this->addEvaluatedField(*scatterHolder_);
513 if (p.
isType<std::string>(
"Global Data Key"))
514 globalDataKey_ = p.
get<std::string>(
"Global Data Key");
516 this->setName(scatterName+
" Scatter Residual (Tangent)");
520 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
525 const Workset & workset_0 = (*d.worksets_)[0];
526 const std::string blockId = this->wda(workset_0).
block_id;
528 fieldIds_.resize(scatterFields_.size());
529 fieldOffsets_.resize(scatterFields_.size());
530 fieldGlobalIndexers_.resize(scatterFields_.size());
531 productVectorBlockIndex_.resize(scatterFields_.size());
532 int maxElementBlockGIDCount = -1;
533 for(std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
534 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
535 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
536 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
537 fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
538 fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName);
540 const std::vector<int>&
offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
541 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size());
542 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
543 for (std::size_t i=0; i < offsets.size(); ++i)
544 hostOffsets(i) = offsets[i];
545 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
547 maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
553 worksetLIDs_ = PHX::View<LO**>(
"ScatterResidual_BlockedTpetra(Tangent):worksetLIDs",
554 scatterFields_[0].extent(0),
555 maxElementBlockGIDCount);
559 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
564 using Teuchos::rcp_dynamic_cast;
568 std::vector<std::string> activeParameters =
571 const int numBlocks =
static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
573 dfdpFieldsVoV_.initialize(
"ScatterResidual_Tpetra<Tangent>::dfdpFieldsVoV_",activeParameters.size(),numBlocks);
575 for(std::size_t i=0;i<activeParameters.size();i++) {
578 rcp_dynamic_cast<ProductVectorBase<double>>(paramBlockedContainer->get_f(),
true);
579 for(
int j=0;j<numBlocks;j++) {
580 auto& tpetraBlock = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(productVector->getNonconstVectorBlock(j),
true))->getTpetraVector());
581 const auto& dfdp_view = tpetraBlock.getLocalViewDevice(Tpetra::Access::ReadWrite);
582 dfdpFieldsVoV_.addView(dfdp_view,i,j);
586 dfdpFieldsVoV_.syncHostToDevice();
589 blockedContainer_ = rcp_dynamic_cast<
const ContainerType>(d.gedc->getDataObject(globalDataKey_));
591 if(blockedContainer_==Teuchos::null) {
598 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
603 using Teuchos::rcp_dynamic_cast;
607 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
611 int currentWorksetLIDSubBlock = -1;
612 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
614 if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
615 fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
616 currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
619 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
620 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
623 const auto& fieldOffsets = fieldOffsets_[fieldIndex];
624 const auto& worksetLIDs = worksetLIDs_;
625 const auto& fieldValues = scatterFields_[fieldIndex].get_static_view();
626 const auto& tangentFieldsDevice = dfdpFieldsVoV_.getViewDevice();
627 const auto& kokkosTangents = Kokkos::subview(tangentFieldsDevice,Kokkos::ALL(),productVectorBlockIndex_[fieldIndex]);
628 const double num_params = Kokkos::dimension_scalar(fieldValues)-1;
630 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
631 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
632 const int lid = worksetLIDs(cell,fieldOffsets(basis));
633 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis).val());
634 for(
int i_param=0; i_param<
num_params; i_param++)
635 kokkosTangents(i_param)(lid,0) += fieldValues(cell,basis).fastAccessDx(i_param);
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)
ScatterResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
Teuchos::RCP< LinearObjContainer > getGhostedLOC() const
PHX::View< const int * > offsets
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void evaluateFields(typename TRAITS::EvalData)
virtual void preEvaluate(typename Traits::PreEvalData d)=0
bool nonnull(const boost::shared_ptr< T > &p)
std::string block_id
DEPRECATED - use: getElementBlock()
PHX::MDField< ScalarT, panzer::Cell, panzer::BASIS > field
A field to which we'll contribute, or in which we'll store, the result of computing this integral...
bool isType(const std::string &name) const