11 #ifndef PANZER_SCATTER_RESIDUAL_BLOCKEDEPETRA_IMPL_HPP
12 #define PANZER_SCATTER_RESIDUAL_BLOCKEDEPETRA_IMPL_HPP
15 #include "Teuchos_Assert.hpp"
17 #include "Phalanx_DataLayout.hpp"
27 #include "Thyra_ProductVectorBase.hpp"
28 #include "Thyra_BlockedLinearOpBase.hpp"
29 #include "Thyra_TpetraVector.hpp"
30 #include "Thyra_TpetraLinearOp.hpp"
31 #include "Tpetra_CrsMatrix.hpp"
32 #include "KokkosSparse_CrsMatrix.hpp"
34 #include "Phalanx_DataLayout_MDALayout.hpp"
36 #include "Teuchos_FancyOStream.hpp"
38 template <
typename EvalT,
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
43 std::string scatterName = p.
get<std::string>(
"Scatter Name");
48 const std::vector<std::string>& names =
55 for (std::size_t eq = 0; eq < names.size(); ++eq) {
59 this->addDependentField(field.fieldTag());
63 this->addEvaluatedField(*scatterHolder);
65 this->setName(scatterName+
" Scatter Residual");
72 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
76 : globalIndexer_(indexer)
77 , globalDataKey_(
"Residual Scatter Container")
79 std::string scatterName = p.
get<std::string>(
"Scatter Name");
84 const std::vector<std::string>& names =
94 scatterFields_.resize(names.size());
95 for (std::size_t eq = 0; eq < names.size(); ++eq) {
99 this->addDependentField(scatterFields_[eq]);
103 this->addEvaluatedField(*scatterHolder_);
105 if (p.
isType<std::string>(
"Global Data Key"))
106 globalDataKey_ = p.
get<std::string>(
"Global Data Key");
108 this->setName(scatterName+
" Scatter Residual");
112 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
117 const Workset & workset_0 = (*d.worksets_)[0];
118 const std::string blockId = this->wda(workset_0).
block_id;
120 fieldIds_.resize(scatterFields_.size());
121 fieldOffsets_.resize(scatterFields_.size());
122 fieldGlobalIndexers_.resize(scatterFields_.size());
123 productVectorBlockIndex_.resize(scatterFields_.size());
124 int maxElementBlockGIDCount = -1;
125 for(std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
126 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
127 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
128 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
129 fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
130 fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName);
132 const std::vector<int>&
offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
133 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
134 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
135 for (std::size_t i=0; i < offsets.size(); ++i)
136 hostOffsets(i) = offsets[i];
137 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
139 maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
145 worksetLIDs_ = PHX::View<LO**>(
"ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
146 scatterFields_[0].extent(0),
147 maxElementBlockGIDCount);
151 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
156 using Teuchos::rcp_dynamic_cast;
159 blockedContainer_ = rcp_dynamic_cast<
const ContainerType>(d.gedc->getDataObject(globalDataKey_));
161 if(blockedContainer_==Teuchos::null) {
168 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
173 using Teuchos::rcp_dynamic_cast;
177 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
181 int currentWorksetLIDSubBlock = -1;
182 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
184 if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
185 fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
186 currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
189 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
190 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
193 const auto& fieldOffsets = fieldOffsets_[fieldIndex];
194 const auto& worksetLIDs = worksetLIDs_;
195 const auto& fieldValues = scatterFields_[fieldIndex].get_static_view();
197 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
198 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
199 const int lid = worksetLIDs(cell,fieldOffsets(basis));
200 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis));
210 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
214 : globalIndexer_(indexer)
215 , globalDataKey_(
"Residual Scatter Container")
217 std::string scatterName = p.
get<std::string>(
"Scatter Name");
222 const std::vector<std::string>& names =
232 scatterFields_.resize(names.size());
233 for (std::size_t eq = 0; eq < names.size(); ++eq) {
237 this->addDependentField(scatterFields_[eq]);
241 this->addEvaluatedField(*scatterHolder_);
243 if (p.
isType<std::string>(
"Global Data Key"))
244 globalDataKey_ = p.
get<std::string>(
"Global Data Key");
246 this->setName(scatterName+
" Scatter Residual (Jacobian)");
250 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
255 const Workset & workset_0 = (*d.worksets_)[0];
256 const std::string blockId = this->wda(workset_0).
block_id;
258 fieldIds_.resize(scatterFields_.size());
259 fieldOffsets_.resize(scatterFields_.size());
260 productVectorBlockIndex_.resize(scatterFields_.size());
261 for (std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
262 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
263 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
264 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
265 const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
266 fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName);
268 const std::vector<int>&
offsets = globalIndexer_->getGIDFieldOffsets(blockId,globalFieldNum);
269 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
270 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
271 for (std::size_t i=0; i < offsets.size(); ++i)
272 hostOffsets(i) = offsets[i];
273 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
279 int elementBlockGIDCount = 0;
280 for (
const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
281 elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
284 "ScatterResidual_BlockedTpetra(Jacobian):worksetLIDs_",
285 scatterFields_[0].extent(0), elementBlockGIDCount );
288 const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
289 const int numBlocks =
static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
290 blockOffsets_ = PHX::View<LO*>(
"ScatterResidual_BlockedTpetra(Jacobian):blockOffsets_",
292 const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
293 for (
int blk=0;blk<numBlocks;blk++) {
294 int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
295 hostBlockOffsets(blk) = blockOffset;
297 hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
298 Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
302 int max_blockDerivativeSize = 0;
303 for (
int blk=0;blk<numBlocks;blk++) {
304 const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
305 if ( blockDerivativeSize > max_blockDerivativeSize )
306 max_blockDerivativeSize = blockDerivativeSize;
309 "ScatterResidual_BlockedTpetra(Jacobian):workset_vals_",
310 scatterFields_[0].extent(0), max_blockDerivativeSize );
314 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
319 using Teuchos::rcp_dynamic_cast;
322 blockedContainer_ = rcp_dynamic_cast<
const ContainerType>(d.gedc->getDataObject(globalDataKey_));
324 if(blockedContainer_==Teuchos::null) {
331 template <
typename TRAITS,
typename LO,
typename GO,
typename NodeT>
336 using Teuchos::rcp_dynamic_cast;
341 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
343 const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
353 using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>,
size_t>;
354 typename PHX::View<LocalMatrixType**>::HostMirror
355 hostJacTpetraBlocks(
"panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
357 PHX::View<int**> blockExistsInJac = PHX::View<int**>(
"blockExistsInJac_",numFieldBlocks,numFieldBlocks);
358 auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
360 for (
int row=0; row < numFieldBlocks; ++row) {
361 for (
int col=0; col < numFieldBlocks; ++col) {
362 const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),
false);
363 if (
nonnull(thyraTpetraOperator)) {
379 const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),
true);
380 const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
381 const auto managedGraph = managedMatrix.graph;
384 using StaticCrsGraphType =
typename LocalMatrixType::StaticCrsGraphType;
385 StaticCrsGraphType unmanagedGraph;
386 unmanagedGraph.entries =
typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
387 unmanagedGraph.row_map =
typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
388 unmanagedGraph.row_block_offsets =
typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
390 typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
391 LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
392 new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
395 hostBlockExistsInJac(row,col) = 1;
398 hostBlockExistsInJac(row,col) = 0;
402 typename PHX::View<LocalMatrixType**>
403 jacTpetraBlocks(
"panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
404 Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
405 Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
412 const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
413 auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
414 Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
415 for (
size_t block=0; block < globalIndexers.size(); ++block) {
416 const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
417 globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
421 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
423 const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
424 typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
426 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),
true))->getTpetraVector());
427 kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
431 const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
432 const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
433 const PHX::View<const LO*> blockOffsets = blockOffsets_;
438 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
439 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
441 typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basis);
442 const int rowLID = worksetLIDs(cell,fieldOffsets(basis));
445 Kokkos::atomic_add(&kokkosResidual(rowLID,0), tmpFieldVal.val());
447 for (
int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
448 if (blockExistsInJac(blockRowIndex,blockColIndex)) {
449 const int start = blockOffsets(blockColIndex);
450 const int stop = blockOffsets(blockColIndex+1);
451 const int sensSize = stop-start;
453 for (
int i=0; i < sensSize; ++i)
454 workset_vals(cell,i) = tmpFieldVal.fastAccessDx(start+i);
456 jacTpetraBlocks(blockRowIndex,blockColIndex).sumIntoValues(rowLID, &worksetLIDs(cell,start), sensSize, &workset_vals(cell,0),
true,
true);
465 for (
int row=0; row < numFieldBlocks; ++row) {
466 for (
int col=0; col < numFieldBlocks; ++col) {
467 if (hostBlockExistsInJac(row,col)) {
468 hostJacTpetraBlocks(row,col).~CrsMatrix();
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)
ScatterResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
Teuchos::RCP< LinearObjContainer > getGhostedLOC() const
PHX::View< const int * > offsets
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void evaluateFields(typename TRAITS::EvalData)
virtual void preEvaluate(typename Traits::PreEvalData d)=0
bool nonnull(const boost::shared_ptr< T > &p)
std::string block_id
DEPRECATED - use: getElementBlock()
PHX::MDField< ScalarT, panzer::Cell, panzer::BASIS > field
A field to which we'll contribute, or in which we'll store, the result of computing this integral...
bool isType(const std::string &name) const