Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Panzer: A partial differential equation assembly
4 // engine for strongly coupled complex multiphysics systems
5 //
6 // Copyright 2011 NTESS and the Panzer contributors.
7 // SPDX-License-Identifier: BSD-3-Clause
8 // *****************************************************************************
9 // @HEADER
10 
11 #ifndef PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
12 #define PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
13 
14 #include "Teuchos_RCP.hpp"
15 #include "Teuchos_Assert.hpp"
16 
17 #include "Phalanx_DataLayout.hpp"
18 
19 // #include "Epetra_Map.h"
20 // #include "Epetra_Vector.h"
21 // #include "Epetra_CrsMatrix.h"
22 
23 #include "Panzer_GlobalIndexer.hpp"
25 #include "Panzer_PureBasis.hpp"
29 
30 #include "Phalanx_DataLayout_MDALayout.hpp"
31 
32 #include "Thyra_SpmdVectorBase.hpp"
33 #include "Thyra_ProductVectorBase.hpp"
34 #include "Thyra_BlockedLinearOpBase.hpp"
35 // #include "Thyra_get_Epetra_Operator.hpp"
36 
37 #include "Teuchos_FancyOStream.hpp"
38 
39 #include <unordered_map>
40 
41 template <typename EvalT,typename TRAITS,typename LO,typename GO,typename NodeT>
44  const Teuchos::ParameterList& p)
45 {
46  std::string scatterName = p.get<std::string>("Scatter Name");
47  Teuchos::RCP<PHX::FieldTag> scatterHolder =
49 
50  // get names to be evaluated
51  const std::vector<std::string>& names =
52  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
53 
55  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
56 
57  // build the vector of fields that this is dependent on
58  for (std::size_t eq = 0; eq < names.size(); ++eq) {
60 
61  // tell the field manager that we depend on this field
62  this->addDependentField(scatterField.fieldTag());
63  }
64 
65  // this is what this evaluator provides
66  this->addEvaluatedField(*scatterHolder);
67 
68  this->setName(scatterName+" Scatter Residual");
69 }
70 
71 // **********************************************************************
72 // Specialization: Residual
73 // **********************************************************************
74 
75 
76 template <typename TRAITS,typename LO,typename GO,typename NodeT>
79  const Teuchos::ParameterList& p)
80  : globalIndexer_(indexer)
81  , globalDataKey_("Residual Scatter Container")
82 {
83  std::string scatterName = p.get<std::string>("Scatter Name");
84  scatterHolder_ =
86 
87  // get names to be evaluated
88  const std::vector<std::string>& names =
89  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
90 
91  // grab map from evaluated names to field names
92  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
93 
94  // determine if we are scattering an initial condition
95  scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get<bool>("Scatter Initial Condition") : false;
96 
97  Teuchos::RCP<PHX::DataLayout> dl = (!scatterIC_) ?
98  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional :
99  p.get< Teuchos::RCP<const panzer::PureBasis> >("Basis")->functional;
100  if (!scatterIC_) {
101  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
102  local_side_id_ = p.get<int>("Local Side ID");
103  }
104 
105  // build the vector of fields that this is dependent on
106  scatterFields_.resize(names.size());
107  for (std::size_t eq = 0; eq < names.size(); ++eq) {
108  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
109 
110  // tell the field manager that we depend on this field
111  this->addDependentField(scatterFields_[eq]);
112  }
113 
114  checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get<bool>("Check Apply BC") : false;
115  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
116  if (checkApplyBC_) {
117  for (std::size_t eq = 0; eq < names.size(); ++eq) {
118  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
119  this->addDependentField(applyBC_[eq]);
120  }
121  }
122 
123  // this is what this evaluator provides
124  this->addEvaluatedField(*scatterHolder_);
125 
126  if (p.isType<std::string>("Global Data Key"))
127  globalDataKey_ = p.get<std::string>("Global Data Key");
128 
129  this->setName(scatterName+" Scatter Dirichlet Residual");
130 }
131 
132 // **********************************************************************
133 template <typename TRAITS,typename LO,typename GO,typename NodeT>
135 postRegistrationSetup(typename TRAITS::SetupData d,
136  PHX::FieldManager<TRAITS>& /* fm */)
137 {
138  const Workset & workset_0 = (*d.worksets_)[0];
139  const std::string blockId = this->wda(workset_0).block_id;
140 
141  fieldIds_.resize(scatterFields_.size());
142  fieldOffsets_.resize(scatterFields_.size());
143  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
144  fieldGlobalIndexers_.resize(scatterFields_.size());
145  productVectorBlockIndex_.resize(scatterFields_.size());
146  int maxElementBlockGIDCount = -1;
147  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
148  // get field ID from DOF manager
149  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
150 
151  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
152  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
153  fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
154  fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer
155 
156  // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC
157  if (!scatterIC_) {
158  const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
159  {
160  const auto& offsets = offsetPair.first;
161  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
162  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
163  for (std::size_t i=0; i < offsets.size(); ++i)
164  hostOffsets(i) = offsets[i];
165  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
166  }
167  {
168  const auto& basisIndex = offsetPair.second;
169  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):basisIndexForMDFieldOffsets",basisIndex.size());
170  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
171  for (std::size_t i=0; i < basisIndex.size(); ++i)
172  hostBasisIndex(i) = basisIndex[i];
173  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
174  }
175  }
176  else {
177  // For ICs, only need offsets, not basisIndex
178  const std::vector<int>& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
179  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
180  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
181  for (std::size_t i=0; i < offsets.size(); ++i)
182  hostOffsets(i) = offsets[i];
183  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
184  }
185 
186  maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
187  }
188 
189  // We will use one workset lid view for all fields, but has to be
190  // sized big enough to hold the largest elementBlockGIDCount in the
191  // ProductVector.
192  worksetLIDs_ = PHX::View<LO**>("ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
193  scatterFields_[0].extent(0),
194  maxElementBlockGIDCount);
195 }
196 
197 // **********************************************************************
198 template <typename TRAITS,typename LO,typename GO,typename NodeT>
200 preEvaluate(typename TRAITS::PreEvalData d)
201 {
202  // extract dirichlet counter from container
203  Teuchos::RCP<const ContainerType> blockContainer
204  = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
205 
206  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
207  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
208 
209  // extract linear object container
210  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
211  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
212 }
213 
214 // **********************************************************************
215 template <typename TRAITS,typename LO,typename GO,typename NodeT>
217 evaluateFields(typename TRAITS::EvalData workset)
218 {
219  using Teuchos::RCP;
220  using Teuchos::rcp_dynamic_cast;
221  using Thyra::VectorBase;
223 
224  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
225 
226  RCP<ProductVectorBase<double> > thyraScatterTarget = (!scatterIC_) ?
227  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),true) :
228  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_x(),true);
229 
230  // Loop over scattered fields
231  int currentWorksetLIDSubBlock = -1;
232  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
233  // workset LIDs only change for different sub blocks
234  if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
235  fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
236  currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
237  }
238 
239  // Get Scatter target block
240  auto& tpetraScatterTarget = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
241  const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite);
242 
243  // Get dirichlet counter block
244  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
245  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
246 
247  // Class data fields for lambda capture
248  const auto fieldOffsets = fieldOffsets_[fieldIndex];
249  const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
250  const auto worksetLIDs = worksetLIDs_;
251  const auto fieldValues = scatterFields_[fieldIndex].get_static_view();
252  const auto applyBC = applyBC_[fieldIndex].get_static_view();
253  const bool checkApplyBC = checkApplyBC_;
254 
255  if (!scatterIC_) {
256 
257  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
258  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
259  const int lid = worksetLIDs(cell,fieldOffsets(basis));
260  if (lid < 0) // not on this processor!
261  continue;
262  const int basisIndex = basisIndices(basis);
263 
264  // Possible warp divergence for hierarchic
265  if (checkApplyBC)
266  if (!applyBC(cell,basisIndex))
267  continue;
268 
269  kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex);
270  kokkosDirichletCounter(lid,0) = 1.0;
271  }
272  });
273 
274  } else {
275 
276  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
277  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
278  const int lid = worksetLIDs(cell,fieldOffsets(basis));
279  if (lid < 0) // not on this processor!
280  continue;
281  kokkosScatterTarget(lid,0) = fieldValues(cell,basis);
282  kokkosDirichletCounter(lid,0) = 1.0;
283  }
284  });
285 
286  }
287  }
288 
289 }
290 
291 // **********************************************************************
292 // Specialization: Jacobian
293 // **********************************************************************
294 
295 template <typename TRAITS,typename LO,typename GO,typename NodeT>
298  const Teuchos::ParameterList& p)
299  : globalIndexer_(indexer)
300  , globalDataKey_("Residual Scatter Container")
301 {
302  std::string scatterName = p.get<std::string>("Scatter Name");
303  scatterHolder_ =
305 
306  // get names to be evaluated
307  const std::vector<std::string>& names =
308  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
309 
310  // grab map from evaluated names to field names
311  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
312 
314  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
315 
316  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
317  local_side_id_ = p.get<int>("Local Side ID");
318 
319  // build the vector of fields that this is dependent on
320  scatterFields_.resize(names.size());
321  for (std::size_t eq = 0; eq < names.size(); ++eq) {
322  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
323 
324  // tell the field manager that we depend on this field
325  this->addDependentField(scatterFields_[eq]);
326  }
327 
328  checkApplyBC_ = p.get<bool>("Check Apply BC");
329  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
330  if (checkApplyBC_) {
331  for (std::size_t eq = 0; eq < names.size(); ++eq) {
332  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
333  this->addDependentField(applyBC_[eq]);
334  }
335  }
336 
337  // this is what this evaluator provides
338  this->addEvaluatedField(*scatterHolder_);
339 
340  if (p.isType<std::string>("Global Data Key"))
341  globalDataKey_ = p.get<std::string>("Global Data Key");
342 
343  this->setName(scatterName+" Scatter Dirichlet Residual (Jacobian)");
344 }
345 
346 // **********************************************************************
347 template <typename TRAITS,typename LO,typename GO,typename NodeT>
349 postRegistrationSetup(typename TRAITS::SetupData d,
350  PHX::FieldManager<TRAITS>& /* fm */)
351 {
352  const Workset & workset_0 = (*d.worksets_)[0];
353  const std::string blockId = this->wda(workset_0).block_id;
354 
355  fieldIds_.resize(scatterFields_.size());
356  fieldOffsets_.resize(scatterFields_.size());
357  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
358  productVectorBlockIndex_.resize(scatterFields_.size());
359  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
360 
361  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
362  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
363  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
364  const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
365  fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName); // Field number in the sub-global-indexer
366 
367  const auto& offsetPair = fieldGlobalIndexer->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
368  {
369  const auto& offsets = offsetPair.first;
370  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
371  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
372  for (std::size_t i=0; i < offsets.size(); ++i)
373  hostOffsets(i) = offsets[i];
374  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
375  }
376  {
377  const auto& basisIndex = offsetPair.second;
378  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):basisIndexForMDFieldOffsets",basisIndex.size());
379  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
380  for (std::size_t i=0; i < basisIndex.size(); ++i)
381  hostBasisIndex(i) = basisIndex[i];
382  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
383  }
384  }
385 
386  // This is sized differently than the Residual implementation since
387  // we need the LIDs for all sub-blocks, not just the single
388  // sub-block for the field residual scatter.
389  int elementBlockGIDCount = 0;
390  for (const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
391  elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
392 
393  worksetLIDs_ = PHX::View<LO**>("ScatterDirichletResidual_BlockedTpetra(Jacobian):worksetLIDs",
394  scatterFields_[0].extent(0),
395  elementBlockGIDCount);
396 
397  // Compute the block offsets
398  const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
399  const int numBlocks = static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
400  blockOffsets_ = PHX::View<LO*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):blockOffsets_",
401  numBlocks+1); // Number of fields, plus a sentinel
402  const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
403  for (int blk=0;blk<numBlocks;blk++) {
404  int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
405  hostBlockOffsets(blk) = blockOffset;
406  }
407  hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
408  Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
409 
410  // Make sure the that hard coded derivative dimension in the
411  // evaluate call is large enough to hold all derivatives for each
412  // sub block load
413  for (int blk=0;blk<numBlocks;blk++) {
414  const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
415  TEUCHOS_TEST_FOR_EXCEPTION(blockDerivativeSize > maxDerivativeArraySize_, std::runtime_error,
416  "ERROR: the derivative dimension for sub block "
417  << blk << "with a value of " << blockDerivativeSize
418  << "is larger than the size allocated for cLIDs and vals "
419  << "in the evaluate call! You must manually increase the "
420  << "size and recompile!");
421  }
422 }
423 
424 // **********************************************************************
425 template <typename TRAITS,typename LO,typename GO,typename NodeT>
427 preEvaluate(typename TRAITS::PreEvalData d)
428 {
429  // extract dirichlet counter from container
430  Teuchos::RCP<const ContainerType> blockContainer
431  = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
432 
433  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
434  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
435 
436  // extract linear object container
437  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
438  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
439 }
440 
441 // **********************************************************************
442 template <typename TRAITS,typename LO,typename GO,typename NodeT>
444 evaluateFields(typename TRAITS::EvalData workset)
445 {
446  using Teuchos::RCP;
447  using Teuchos::rcp_dynamic_cast;
448  using Thyra::VectorBase;
451 
452  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
453 
454  const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
455  const RCP<const ContainerType> blockedContainer = blockedContainer_;
456  const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double>>(blockedContainer_->get_f());
457  const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
458  const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double>>(blockedContainer_->get_A(),true);
459 
460  // Get the local data for the sub-block crs matrices. First allocate
461  // on host and then deep_copy to device. The sub-blocks are
462  // unmanaged since they are allocated and ref counted separately on
463  // host.
464  using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>, size_t>;
465  typename PHX::View<LocalMatrixType**>::HostMirror
466  hostJacTpetraBlocks("panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
467 
468  PHX::View<int**> blockExistsInJac = PHX::View<int**>("blockExistsInJac_",numFieldBlocks,numFieldBlocks);
469  auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
470 
471  for (int row=0; row < numFieldBlocks; ++row) {
472  for (int col=0; col < numFieldBlocks; ++col) {
473  const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),false);
474  if (nonnull(thyraTpetraOperator)) {
475 
476  // HACK to enforce views in the CrsGraph to be
477  // Unmanaged. Passing in the MemoryTrait<Unmanaged> doesn't
478  // work as the CrsGraph in the CrsMatrix ignores the
479  // MemoryTrait. Need to use the runtime constructor by passing
480  // in points to ensure Unmanaged. See:
481  // https://github.com/kokkos/kokkos/issues/1581
482 
483  // These two lines are the original code we can revert to when #1581 is fixed.
484  // const auto crsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
485  // new (&hostJacTpetraBlocks(row,col)) KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>> (crsMatrix->getLocalMatrix());
486 
487  // Instead do this
488  {
489  // Grab the local managed matrix and graph
490  const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
491  const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
492  const auto managedGraph = managedMatrix.graph;
493 
494  // Create runtime unmanaged versions
495  using StaticCrsGraphType = typename LocalMatrixType::StaticCrsGraphType;
496  StaticCrsGraphType unmanagedGraph;
497  unmanagedGraph.entries = typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
498  unmanagedGraph.row_map = typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
499  unmanagedGraph.row_block_offsets = typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
500 
501  typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
502  LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
503  new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
504  }
505 
506  hostBlockExistsInJac(row,col) = 1;
507  }
508  else {
509  hostBlockExistsInJac(row,col) = 0;
510  }
511  }
512  }
513  typename PHX::View<LocalMatrixType**>
514  jacTpetraBlocks("panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
515  Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
516  Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
517 
518  // worksetLIDs is larger for Jacobian than Residual fill. Need the
519  // entire set of field offsets for derivative indexing no matter
520  // which block row you are scattering. The residual only needs the
521  // lids for the sub-block that it is scattering to. The subviews
522  // below are to offset the LID blocks correctly.
523  const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
524  auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
525  Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
526  for (size_t block=0; block < globalIndexers.size(); ++block) {
527  const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
528  globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
529  }
530 
531  // Loop over scattered fields
532  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
533 
534  const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
535  typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
536  if (haveResidual) {
537  auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
538  kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
539  }
540 
541  // Get dirichlet counter block
542  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
543  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
544 
545  // Class data fields for lambda capture
546  const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
547  const auto& basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
548  const PHX::View<const LO**> worksetLIDs = worksetLIDs_;
549  const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
550  const PHX::View<const LO*> blockOffsets = blockOffsets_;
551  const auto& applyBC = applyBC_[fieldIndex].get_static_view();
552  const bool checkApplyBC = checkApplyBC_;
553 
554  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
555  LO cLIDs[maxDerivativeArraySize_];
556  typename Sacado::ScalarType<ScalarT>::type vals[maxDerivativeArraySize_];
557 
558  for(int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
559  const int block_row_offset = blockOffsets(blockRowIndex);
560  const int rowLID = worksetLIDs(cell,block_row_offset+fieldOffsets(basis));
561 
562  if (rowLID < 0) // not on this processor!
563  continue;
564 
565  const int basisIndex = basisIndices(basis);
566 
567  // Possible warp divergence for hierarchic
568  if (checkApplyBC)
569  if (!applyBC(cell,basisIndex))
570  continue;
571 
573  typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basisIndex);
574 
575  if (haveResidual)
576  kokkosResidual(rowLID,0) = tmpFieldVal.val();
577 
578  kokkosDirichletCounter(rowLID,0) = 1.0;
579 
580  // Zero out entire matrix row
581  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
582  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
583  const auto& rowEntries = jacTpetraBlocks(blockRowIndex,blockColIndex).row(rowLID);
584  for (int i=0; i < rowEntries.length; ++i)
585  rowEntries.value(i) = Traits::RealType(0.0);
586  }
587  }
588 
589  // Set values
590  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
591  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
592  const int start = blockOffsets(blockColIndex);
593  const int stop = blockOffsets(blockColIndex+1);
594  const int sensSize = stop-start;
595  // Views may be padded. Use contiguous arrays here
596  for (int i=0; i < sensSize; ++i) {
597  cLIDs[i] = worksetLIDs(cell,start+i);
598  vals[i] = tmpFieldVal.fastAccessDx(start+i);
599  }
600  jacTpetraBlocks(blockRowIndex,blockColIndex).replaceValues(rowLID,cLIDs,sensSize,vals,true,true);
601  }
602  }
603  }
604  });
605 
606  }
607 
608  // Placement delete on view of matrices
609  for (int row=0; row < numFieldBlocks; ++row) {
610  for (int col=0; col < numFieldBlocks; ++col) {
611  if (hostBlockExistsInJac(row,col)) {
612  hostJacTpetraBlocks(row,col).~CrsMatrix();
613  }
614  }
615  }
616 
617 }
618 
619 // **********************************************************************
620 
621 // **********************************************************************
622 // Specialization: Tangent
623 // **********************************************************************
624 
625 
626 template <typename TRAITS,typename LO,typename GO,typename NodeT>
629  const Teuchos::ParameterList& p)
630  : globalIndexer_(indexer)
631  , globalDataKey_("Residual Scatter Container")
632 {
633  std::string scatterName = p.get<std::string>("Scatter Name");
634  scatterHolder_ =
636 
637  // get names to be evaluated
638  const std::vector<std::string>& names =
639  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
640 
641  // grab map from evaluated names to field names
642  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
643 
644  // determine if we are scattering an initial condition
645  scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get<bool>("Scatter Initial Condition") : false;
646 
647  Teuchos::RCP<PHX::DataLayout> dl = (!scatterIC_) ?
648  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional :
649  p.get< Teuchos::RCP<const panzer::PureBasis> >("Basis")->functional;
650  if (!scatterIC_) {
651  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
652  local_side_id_ = p.get<int>("Local Side ID");
653  }
654 
655  // build the vector of fields that this is dependent on
656  scatterFields_.resize(names.size());
657  for (std::size_t eq = 0; eq < names.size(); ++eq) {
658  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
659 
660  // tell the field manager that we depend on this field
661  this->addDependentField(scatterFields_[eq]);
662  }
663 
664  checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get<bool>("Check Apply BC") : false;
665  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
666  if (checkApplyBC_) {
667  for (std::size_t eq = 0; eq < names.size(); ++eq) {
668  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
669  this->addDependentField(applyBC_[eq]);
670  }
671  }
672 
673  // this is what this evaluator provides
674  this->addEvaluatedField(*scatterHolder_);
675 
676  if (p.isType<std::string>("Global Data Key"))
677  globalDataKey_ = p.get<std::string>("Global Data Key");
678 
679  this->setName(scatterName+" Scatter Dirichlet Residual");
680 }
681 
682 // **********************************************************************
683 template <typename TRAITS,typename LO,typename GO,typename NodeT>
685 postRegistrationSetup(typename TRAITS::SetupData d,
686  PHX::FieldManager<TRAITS>& /* fm */)
687 {
688  const Workset & workset_0 = (*d.worksets_)[0];
689  const std::string blockId = this->wda(workset_0).block_id;
690 
691  fieldIds_.resize(scatterFields_.size());
692  fieldOffsets_.resize(scatterFields_.size());
693  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
694  fieldGlobalIndexers_.resize(scatterFields_.size());
695  productVectorBlockIndex_.resize(scatterFields_.size());
696  int maxElementBlockGIDCount = -1;
697  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
698  // get field ID from DOF manager
699  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
700 
701  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
702  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
703  fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
704  fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer
705 
706  // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC
707  if (!scatterIC_) {
708  const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
709  {
710  const auto& offsets = offsetPair.first;
711  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size());
712  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
713  for (std::size_t i=0; i < offsets.size(); ++i)
714  hostOffsets(i) = offsets[i];
715  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
716  }
717  {
718  const auto& basisIndex = offsetPair.second;
719  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Tangent):basisIndexForMDFieldOffsets",basisIndex.size());
720  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
721  for (std::size_t i=0; i < basisIndex.size(); ++i)
722  hostBasisIndex(i) = basisIndex[i];
723  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
724  }
725  }
726  else {
727  // For ICs, only need offsets, not basisIndex
728  const std::vector<int>& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
729  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size());
730  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
731  for (std::size_t i=0; i < offsets.size(); ++i)
732  hostOffsets(i) = offsets[i];
733  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
734  }
735 
736  maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
737  }
738 
739  // We will use one workset lid view for all fields, but has to be
740  // sized big enough to hold the largest elementBlockGIDCount in the
741  // ProductVector.
742  worksetLIDs_ = PHX::View<LO**>("ScatterResidual_BlockedTpetra(Tangent):worksetLIDs",
743  scatterFields_[0].extent(0),
744  maxElementBlockGIDCount);
745 }
746 
747 // **********************************************************************
748 template <typename TRAITS,typename LO,typename GO,typename NodeT>
750 preEvaluate(typename TRAITS::PreEvalData d)
751 {
752 
753  // this is the list of parameters and their names that this scatter has to account for
754  std::vector<std::string> activeParameters =
755  Teuchos::rcp_dynamic_cast<ParameterList_GlobalEvaluationData>(d.gedc->getDataObject("PARAMETER_NAMES"))->getActiveParameters();
756 
757  const int numBlocks = static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
758 
759  dfdpFieldsVoV_.initialize("ScatterResidual_Tpetra<Tangent>::dfdpFieldsVoV_",activeParameters.size(),numBlocks);
760 
761  for(std::size_t i=0;i<activeParameters.size();i++) {
762  Teuchos::RCP<ContainerType> paramBlockedContainer = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject(activeParameters[i]),true);
764  Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double>>(paramBlockedContainer->get_f(),true);
765  for(int j=0;j<numBlocks;j++) {
766  auto& tpetraBlock = *((Teuchos::rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(productVector->getNonconstVectorBlock(j),true))->getTpetraVector());
767  const auto& dfdp_view = tpetraBlock.getLocalViewDevice(Tpetra::Access::ReadWrite);
768  dfdpFieldsVoV_.addView(dfdp_view,i,j);
769  }
770  }
771 
772  dfdpFieldsVoV_.syncHostToDevice();
773 
774  // extract dirichlet counter from container
775  Teuchos::RCP<const ContainerType> blockContainer
776  = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
777 
778  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
779  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
780 
781  // extract linear object container
782  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
783  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
784 }
785 
786 // **********************************************************************
787 template <typename TRAITS,typename LO,typename GO,typename NodeT>
789 evaluateFields(typename TRAITS::EvalData workset)
790 {
791  using Teuchos::RCP;
792  using Teuchos::rcp_dynamic_cast;
793  using Thyra::VectorBase;
795 
796  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
797 
798  RCP<ProductVectorBase<double> > thyraScatterTarget = (!scatterIC_) ?
799  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),true) :
800  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_x(),true);
801 
802  // Loop over scattered fields
803  int currentWorksetLIDSubBlock = -1;
804  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
805  // workset LIDs only change for different sub blocks
806  if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
807  fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
808  currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
809  }
810 
811  // Get Scatter target block
812  auto& tpetraScatterTarget = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
813  const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite);
814 
815  // Get dirichlet counter block
816  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
817  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
818 
819  // Class data fields for lambda capture
820  const auto fieldOffsets = fieldOffsets_[fieldIndex];
821  const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
822  const auto worksetLIDs = worksetLIDs_;
823  const auto fieldValues = scatterFields_[fieldIndex].get_static_view();
824  const auto applyBC = applyBC_[fieldIndex].get_static_view();
825  const bool checkApplyBC = checkApplyBC_;
826  const auto& tangentFieldsDevice = dfdpFieldsVoV_.getViewDevice();
827  const auto& kokkosTangents = Kokkos::subview(tangentFieldsDevice,Kokkos::ALL(),productVectorBlockIndex_[fieldIndex]);
828  const double num_params = Kokkos::dimension_scalar(fieldValues)-1;
829 
830  if (!scatterIC_) {
831 
832  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
833  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
834  const int lid = worksetLIDs(cell,fieldOffsets(basis));
835  if (lid < 0) // not on this processor!
836  continue;
837  const int basisIndex = basisIndices(basis);
838 
839  // Possible warp divergence for hierarchic
840  if (checkApplyBC)
841  if (!applyBC(cell,basisIndex))
842  continue;
843 
844  kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex).val();
845  for(int i_param=0; i_param<num_params; i_param++)
846  kokkosTangents(i_param)(lid,0) = fieldValues(cell,basisIndex).fastAccessDx(i_param);
847 
848  kokkosDirichletCounter(lid,0) = 1.0;
849  }
850  });
851 
852  } else {
853 
854  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
855  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
856  const int lid = worksetLIDs(cell,fieldOffsets(basis));
857  if (lid < 0) // not on this processor!
858  continue;
859  kokkosScatterTarget(lid,0) = fieldValues(cell,basis).val();
860  for(int i_param=0; i_param<num_params; i_param++)
861  kokkosTangents(i_param)(lid,0) = fieldValues(cell,basis).fastAccessDx(i_param);
862  kokkosDirichletCounter(lid,0) = 1.0;
863  }
864  });
865 
866  }
867  }
868 
869 }
870 #endif
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
bool is_null(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
PHX::View< const int * > offsets
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
virtual void preEvaluate(typename Traits::PreEvalData d)=0
bool nonnull(const boost::shared_ptr< T > &p)
std::string block_id
DEPRECATED - use: getElementBlock()
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)