Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Panzer: A partial differential equation assembly
4 // engine for strongly coupled complex multiphysics systems
5 //
6 // Copyright 2011 NTESS and the Panzer contributors.
7 // SPDX-License-Identifier: BSD-3-Clause
8 // *****************************************************************************
9 // @HEADER
10 
11 #ifndef PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
12 #define PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
13 
14 #include "Teuchos_RCP.hpp"
15 #include "Teuchos_Assert.hpp"
16 
17 #include "Phalanx_DataLayout.hpp"
18 
19 // #include "Epetra_Map.h"
20 // #include "Epetra_Vector.h"
21 // #include "Epetra_CrsMatrix.h"
22 
23 #include "Panzer_GlobalIndexer.hpp"
25 #include "Panzer_PureBasis.hpp"
28 
29 #include "Phalanx_DataLayout_MDALayout.hpp"
30 
31 #include "Thyra_SpmdVectorBase.hpp"
32 #include "Thyra_ProductVectorBase.hpp"
33 #include "Thyra_BlockedLinearOpBase.hpp"
34 // #include "Thyra_get_Epetra_Operator.hpp"
35 
36 #include "Teuchos_FancyOStream.hpp"
37 
38 #include <unordered_map>
39 
40 template <typename EvalT,typename TRAITS,typename LO,typename GO,typename NodeT>
43  const Teuchos::ParameterList& p)
44 {
45  std::string scatterName = p.get<std::string>("Scatter Name");
46  Teuchos::RCP<PHX::FieldTag> scatterHolder =
48 
49  // get names to be evaluated
50  const std::vector<std::string>& names =
51  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
52 
54  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
55 
56  // build the vector of fields that this is dependent on
57  for (std::size_t eq = 0; eq < names.size(); ++eq) {
59 
60  // tell the field manager that we depend on this field
61  this->addDependentField(scatterField.fieldTag());
62  }
63 
64  // this is what this evaluator provides
65  this->addEvaluatedField(*scatterHolder);
66 
67  this->setName(scatterName+" Scatter Residual");
68 }
69 
70 // **********************************************************************
71 // Specialization: Residual
72 // **********************************************************************
73 
74 
75 template <typename TRAITS,typename LO,typename GO,typename NodeT>
78  const Teuchos::ParameterList& p)
79  : globalIndexer_(indexer)
80  , globalDataKey_("Residual Scatter Container")
81 {
82  std::string scatterName = p.get<std::string>("Scatter Name");
83  scatterHolder_ =
85 
86  // get names to be evaluated
87  const std::vector<std::string>& names =
88  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
89 
90  // grab map from evaluated names to field names
91  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
92 
93  // determine if we are scattering an initial condition
94  scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get<bool>("Scatter Initial Condition") : false;
95 
96  Teuchos::RCP<PHX::DataLayout> dl = (!scatterIC_) ?
97  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional :
98  p.get< Teuchos::RCP<const panzer::PureBasis> >("Basis")->functional;
99  if (!scatterIC_) {
100  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
101  local_side_id_ = p.get<int>("Local Side ID");
102  }
103 
104  // build the vector of fields that this is dependent on
105  scatterFields_.resize(names.size());
106  for (std::size_t eq = 0; eq < names.size(); ++eq) {
107  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
108 
109  // tell the field manager that we depend on this field
110  this->addDependentField(scatterFields_[eq]);
111  }
112 
113  checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get<bool>("Check Apply BC") : false;
114  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
115  if (checkApplyBC_) {
116  for (std::size_t eq = 0; eq < names.size(); ++eq) {
117  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
118  this->addDependentField(applyBC_[eq]);
119  }
120  }
121 
122  // this is what this evaluator provides
123  this->addEvaluatedField(*scatterHolder_);
124 
125  if (p.isType<std::string>("Global Data Key"))
126  globalDataKey_ = p.get<std::string>("Global Data Key");
127 
128  this->setName(scatterName+" Scatter Residual");
129 }
130 
131 // **********************************************************************
132 template <typename TRAITS,typename LO,typename GO,typename NodeT>
134 postRegistrationSetup(typename TRAITS::SetupData d,
135  PHX::FieldManager<TRAITS>& /* fm */)
136 {
137  const Workset & workset_0 = (*d.worksets_)[0];
138  const std::string blockId = this->wda(workset_0).block_id;
139 
140  fieldIds_.resize(scatterFields_.size());
141  fieldOffsets_.resize(scatterFields_.size());
142  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
143  fieldGlobalIndexers_.resize(scatterFields_.size());
144  productVectorBlockIndex_.resize(scatterFields_.size());
145  int maxElementBlockGIDCount = -1;
146  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
147  // get field ID from DOF manager
148  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
149 
150  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
151  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
152  fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
153  fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer
154 
155  // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC
156  if (!scatterIC_) {
157  const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
158  {
159  const auto& offsets = offsetPair.first;
160  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
161  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
162  for (std::size_t i=0; i < offsets.size(); ++i)
163  hostOffsets(i) = offsets[i];
164  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
165  }
166  {
167  const auto& basisIndex = offsetPair.second;
168  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):basisIndexForMDFieldOffsets",basisIndex.size());
169  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
170  for (std::size_t i=0; i < basisIndex.size(); ++i)
171  hostBasisIndex(i) = basisIndex[i];
172  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
173  }
174  }
175  else {
176  // For ICs, only need offsets, not basisIndex
177  const std::vector<int>& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
178  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
179  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
180  for (std::size_t i=0; i < offsets.size(); ++i)
181  hostOffsets(i) = offsets[i];
182  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
183  }
184 
185  maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
186  }
187 
188  // We will use one workset lid view for all fields, but has to be
189  // sized big enough to hold the largest elementBlockGIDCount in the
190  // ProductVector.
191  worksetLIDs_ = PHX::View<LO**>("ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
192  scatterFields_[0].extent(0),
193  maxElementBlockGIDCount);
194 }
195 
196 // **********************************************************************
197 template <typename TRAITS,typename LO,typename GO,typename NodeT>
199 preEvaluate(typename TRAITS::PreEvalData d)
200 {
201  // extract dirichlet counter from container
202  Teuchos::RCP<const ContainerType> blockContainer
203  = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
204 
205  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
206  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
207 
208  // extract linear object container
209  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
210  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
211 }
212 
213 // **********************************************************************
214 template <typename TRAITS,typename LO,typename GO,typename NodeT>
216 evaluateFields(typename TRAITS::EvalData workset)
217 {
218  using Teuchos::RCP;
219  using Teuchos::rcp_dynamic_cast;
220  using Thyra::VectorBase;
222 
223  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
224 
225  RCP<ProductVectorBase<double> > thyraScatterTarget = (!scatterIC_) ?
226  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),true) :
227  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_x(),true);
228 
229  // Loop over scattered fields
230  int currentWorksetLIDSubBlock = -1;
231  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
232  // workset LIDs only change for different sub blocks
233  if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
234  fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
235  currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
236  }
237 
238  // Get Scatter target block
239  auto& tpetraScatterTarget = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
240  const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite);
241 
242  // Get dirichlet counter block
243  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
244  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
245 
246  // Class data fields for lambda capture
247  const auto fieldOffsets = fieldOffsets_[fieldIndex];
248  const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
249  const auto worksetLIDs = worksetLIDs_;
250  const auto fieldValues = scatterFields_[fieldIndex].get_static_view();
251  const auto applyBC = applyBC_[fieldIndex].get_static_view();
252  const bool checkApplyBC = checkApplyBC_;
253 
254  if (!scatterIC_) {
255 
256  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
257  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
258  const int lid = worksetLIDs(cell,fieldOffsets(basis));
259  if (lid < 0) // not on this processor!
260  continue;
261  const int basisIndex = basisIndices(basis);
262 
263  // Possible warp divergence for hierarchic
264  if (checkApplyBC)
265  if (!applyBC(cell,basisIndex))
266  continue;
267 
268  kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex);
269  kokkosDirichletCounter(lid,0) = 1.0;
270  }
271  });
272 
273  } else {
274 
275  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
276  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
277  const int lid = worksetLIDs(cell,fieldOffsets(basis));
278  if (lid < 0) // not on this processor!
279  continue;
280  kokkosScatterTarget(lid,0) = fieldValues(cell,basis);
281  kokkosDirichletCounter(lid,0) = 1.0;
282  }
283  });
284 
285  }
286  }
287 
288 }
289 
290 // **********************************************************************
291 // Specialization: Jacobian
292 // **********************************************************************
293 
294 template <typename TRAITS,typename LO,typename GO,typename NodeT>
297  const Teuchos::ParameterList& p)
298  : globalIndexer_(indexer)
299  , globalDataKey_("Residual Scatter Container")
300 {
301  std::string scatterName = p.get<std::string>("Scatter Name");
302  scatterHolder_ =
304 
305  // get names to be evaluated
306  const std::vector<std::string>& names =
307  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
308 
309  // grab map from evaluated names to field names
310  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
311 
313  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
314 
315  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
316  local_side_id_ = p.get<int>("Local Side ID");
317 
318  // build the vector of fields that this is dependent on
319  scatterFields_.resize(names.size());
320  for (std::size_t eq = 0; eq < names.size(); ++eq) {
321  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
322 
323  // tell the field manager that we depend on this field
324  this->addDependentField(scatterFields_[eq]);
325  }
326 
327  checkApplyBC_ = p.get<bool>("Check Apply BC");
328  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
329  if (checkApplyBC_) {
330  for (std::size_t eq = 0; eq < names.size(); ++eq) {
331  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
332  this->addDependentField(applyBC_[eq]);
333  }
334  }
335 
336  // this is what this evaluator provides
337  this->addEvaluatedField(*scatterHolder_);
338 
339  if (p.isType<std::string>("Global Data Key"))
340  globalDataKey_ = p.get<std::string>("Global Data Key");
341 
342  this->setName(scatterName+" Scatter Residual (Jacobian)");
343 }
344 
345 // **********************************************************************
346 template <typename TRAITS,typename LO,typename GO,typename NodeT>
348 postRegistrationSetup(typename TRAITS::SetupData d,
349  PHX::FieldManager<TRAITS>& /* fm */)
350 {
351  const Workset & workset_0 = (*d.worksets_)[0];
352  const std::string blockId = this->wda(workset_0).block_id;
353 
354  fieldIds_.resize(scatterFields_.size());
355  fieldOffsets_.resize(scatterFields_.size());
356  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
357  productVectorBlockIndex_.resize(scatterFields_.size());
358  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
359 
360  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
361  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
362  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
363  const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
364  fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName); // Field number in the sub-global-indexer
365 
366  const auto& offsetPair = fieldGlobalIndexer->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
367  {
368  const auto& offsets = offsetPair.first;
369  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
370  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
371  for (std::size_t i=0; i < offsets.size(); ++i)
372  hostOffsets(i) = offsets[i];
373  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
374  }
375  {
376  const auto& basisIndex = offsetPair.second;
377  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):basisIndexForMDFieldOffsets",basisIndex.size());
378  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
379  for (std::size_t i=0; i < basisIndex.size(); ++i)
380  hostBasisIndex(i) = basisIndex[i];
381  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
382  }
383  }
384 
385  // This is sized differently than the Residual implementation since
386  // we need the LIDs for all sub-blocks, not just the single
387  // sub-block for the field residual scatter.
388  int elementBlockGIDCount = 0;
389  for (const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
390  elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
391 
392  worksetLIDs_ = PHX::View<LO**>("ScatterDirichletResidual_BlockedTpetra(Jacobian):worksetLIDs",
393  scatterFields_[0].extent(0),
394  elementBlockGIDCount);
395 
396  // Compute the block offsets
397  const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
398  const int numBlocks = static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
399  blockOffsets_ = PHX::View<LO*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):blockOffsets_",
400  numBlocks+1); // Number of fields, plus a sentinel
401  const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
402  for (int blk=0;blk<numBlocks;blk++) {
403  int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
404  hostBlockOffsets(blk) = blockOffset;
405  }
406  hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
407  Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
408 
409  // Make sure the that hard coded derivative dimension in the
410  // evaluate call is large enough to hold all derivatives for each
411  // sub block load
412  for (int blk=0;blk<numBlocks;blk++) {
413  const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
414  TEUCHOS_TEST_FOR_EXCEPTION(blockDerivativeSize > maxDerivativeArraySize_, std::runtime_error,
415  "ERROR: the derivative dimension for sub block "
416  << blk << "with a value of " << blockDerivativeSize
417  << "is larger than the size allocated for cLIDs and vals "
418  << "in the evaluate call! You must manually increase the "
419  << "size and recompile!");
420  }
421 }
422 
423 // **********************************************************************
424 template <typename TRAITS,typename LO,typename GO,typename NodeT>
426 preEvaluate(typename TRAITS::PreEvalData d)
427 {
428  // extract dirichlet counter from container
429  Teuchos::RCP<const ContainerType> blockContainer
430  = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
431 
432  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
433  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
434 
435  // extract linear object container
436  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
437  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
438 }
439 
440 // **********************************************************************
441 template <typename TRAITS,typename LO,typename GO,typename NodeT>
443 evaluateFields(typename TRAITS::EvalData workset)
444 {
445  using Teuchos::RCP;
446  using Teuchos::rcp_dynamic_cast;
447  using Thyra::VectorBase;
450 
451  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
452 
453  const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
454  const RCP<const ContainerType> blockedContainer = blockedContainer_;
455  const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double>>(blockedContainer_->get_f());
456  const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
457  const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double>>(blockedContainer_->get_A(),true);
458 
459  // Get the local data for the sub-block crs matrices. First allocate
460  // on host and then deep_copy to device. The sub-blocks are
461  // unmanaged since they are allocated and ref counted separately on
462  // host.
463  using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>, size_t>;
464  typename PHX::View<LocalMatrixType**>::HostMirror
465  hostJacTpetraBlocks("panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
466 
467  PHX::View<int**> blockExistsInJac = PHX::View<int**>("blockExistsInJac_",numFieldBlocks,numFieldBlocks);
468  auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
469 
470  for (int row=0; row < numFieldBlocks; ++row) {
471  for (int col=0; col < numFieldBlocks; ++col) {
472  const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),false);
473  if (nonnull(thyraTpetraOperator)) {
474 
475  // HACK to enforce views in the CrsGraph to be
476  // Unmanaged. Passing in the MemoryTrait<Unmanaged> doesn't
477  // work as the CrsGraph in the CrsMatrix ignores the
478  // MemoryTrait. Need to use the runtime constructor by passing
479  // in points to ensure Unmanaged. See:
480  // https://github.com/kokkos/kokkos/issues/1581
481 
482  // These two lines are the original code we can revert to when #1581 is fixed.
483  // const auto crsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
484  // new (&hostJacTpetraBlocks(row,col)) KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>> (crsMatrix->getLocalMatrix());
485 
486  // Instead do this
487  {
488  // Grab the local managed matrix and graph
489  const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
490  const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
491  const auto managedGraph = managedMatrix.graph;
492 
493  // Create runtime unmanaged versions
494  using StaticCrsGraphType = typename LocalMatrixType::StaticCrsGraphType;
495  StaticCrsGraphType unmanagedGraph;
496  unmanagedGraph.entries = typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
497  unmanagedGraph.row_map = typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
498  unmanagedGraph.row_block_offsets = typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
499 
500  typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
501  LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
502  new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
503  }
504 
505  hostBlockExistsInJac(row,col) = 1;
506  }
507  else {
508  hostBlockExistsInJac(row,col) = 0;
509  }
510  }
511  }
512  typename PHX::View<LocalMatrixType**>
513  jacTpetraBlocks("panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
514  Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
515  Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
516 
517  // worksetLIDs is larger for Jacobian than Residual fill. Need the
518  // entire set of field offsets for derivative indexing no matter
519  // which block row you are scattering. The residual only needs the
520  // lids for the sub-block that it is scattering to. The subviews
521  // below are to offset the LID blocks correctly.
522  const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
523  auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
524  Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
525  for (size_t block=0; block < globalIndexers.size(); ++block) {
526  const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
527  globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
528  }
529 
530  // Loop over scattered fields
531  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
532 
533  const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
534  typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
535  if (haveResidual) {
536  auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
537  kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
538  }
539 
540  // Get dirichlet counter block
541  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
542  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
543 
544  // Class data fields for lambda capture
545  const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
546  const auto& basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
547  const PHX::View<const LO**> worksetLIDs = worksetLIDs_;
548  const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
549  const PHX::View<const LO*> blockOffsets = blockOffsets_;
550  const auto& applyBC = applyBC_[fieldIndex].get_static_view();
551  const bool checkApplyBC = checkApplyBC_;
552 
553  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
554  LO cLIDs[maxDerivativeArraySize_];
555  typename Sacado::ScalarType<ScalarT>::type vals[maxDerivativeArraySize_];
556 
557  for(int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
558  const int block_row_offset = blockOffsets(blockRowIndex);
559  const int rowLID = worksetLIDs(cell,block_row_offset+fieldOffsets(basis));
560 
561  if (rowLID < 0) // not on this processor!
562  continue;
563 
564  const int basisIndex = basisIndices(basis);
565 
566  // Possible warp divergence for hierarchic
567  if (checkApplyBC)
568  if (!applyBC(cell,basisIndex))
569  continue;
570 
572  typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basisIndex);
573 
574  if (haveResidual)
575  kokkosResidual(rowLID,0) = tmpFieldVal.val();
576 
577  kokkosDirichletCounter(rowLID,0) = 1.0;
578 
579  // Zero out entire matrix row
580  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
581  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
582  const auto& rowEntries = jacTpetraBlocks(blockRowIndex,blockColIndex).row(rowLID);
583  for (int i=0; i < rowEntries.length; ++i)
584  rowEntries.value(i) = Traits::RealType(0.0);
585  }
586  }
587 
588  // Set values
589  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
590  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
591  const int start = blockOffsets(blockColIndex);
592  const int stop = blockOffsets(blockColIndex+1);
593  const int sensSize = stop-start;
594  // Views may be padded. Use contiguous arrays here
595  for (int i=0; i < sensSize; ++i) {
596  cLIDs[i] = worksetLIDs(cell,start+i);
597  vals[i] = tmpFieldVal.fastAccessDx(start+i);
598  }
599  jacTpetraBlocks(blockRowIndex,blockColIndex).replaceValues(rowLID,cLIDs,sensSize,vals,true,true);
600  }
601  }
602  }
603  });
604 
605  }
606 
607  // Placement delete on view of matrices
608  for (int row=0; row < numFieldBlocks; ++row) {
609  for (int col=0; col < numFieldBlocks; ++col) {
610  if (hostBlockExistsInJac(row,col)) {
611  hostJacTpetraBlocks(row,col).~CrsMatrix();
612  }
613  }
614  }
615 
616 }
617 
618 // **********************************************************************
619 
620 #endif
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
bool is_null(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
PHX::View< const int * > offsets
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
virtual void preEvaluate(typename Traits::PreEvalData d)=0
bool nonnull(const boost::shared_ptr< T > &p)
std::string block_id
DEPRECATED - use: getElementBlock()
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)