Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Panzer: A partial differential equation assembly
5 // engine for strongly coupled complex multiphysics systems
6 // Copyright (2011) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Roger P. Pawlowski (rppawlo@sandia.gov) and
39 // Eric C. Cyr (eccyr@sandia.gov)
40 // ***********************************************************************
41 // @HEADER
42 
43 #ifndef PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDEPETRA_IMPL_HPP
44 #define PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDEPETRA_IMPL_HPP
45 
46 #include "Teuchos_RCP.hpp"
47 #include "Teuchos_Assert.hpp"
48 
49 #include "Phalanx_DataLayout.hpp"
50 
51 #include "Epetra_Map.h"
52 #include "Epetra_Vector.h"
53 #include "Epetra_CrsMatrix.h"
54 
55 #include "Panzer_GlobalIndexer.hpp"
57 #include "Panzer_PureBasis.hpp"
60 
61 #include "Phalanx_DataLayout_MDALayout.hpp"
62 
63 #include "Thyra_SpmdVectorBase.hpp"
64 #include "Thyra_ProductVectorBase.hpp"
65 #include "Thyra_BlockedLinearOpBase.hpp"
66 #include "Thyra_get_Epetra_Operator.hpp"
67 
68 #include "Teuchos_FancyOStream.hpp"
69 
70 #include <unordered_map>
71 
72 template <typename EvalT,typename TRAITS,typename LO,typename GO,typename NodeT>
75  const Teuchos::ParameterList& p)
76 {
77  std::string scatterName = p.get<std::string>("Scatter Name");
78  Teuchos::RCP<PHX::FieldTag> scatterHolder =
79  Teuchos::rcp(new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(new PHX::MDALayout<Dummy>(0))));
80 
81  // get names to be evaluated
82  const std::vector<std::string>& names =
83  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
84 
86  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
87 
88  // build the vector of fields that this is dependent on
89  for (std::size_t eq = 0; eq < names.size(); ++eq) {
90  PHX::MDField<const ScalarT,Cell,NODE> scatterField = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
91 
92  // tell the field manager that we depend on this field
93  this->addDependentField(scatterField.fieldTag());
94  }
95 
96  // this is what this evaluator provides
97  this->addEvaluatedField(*scatterHolder);
98 
99  this->setName(scatterName+" Scatter Residual");
100 }
101 
102 // **********************************************************************
103 // Specialization: Residual
104 // **********************************************************************
105 
106 
107 template <typename TRAITS,typename LO,typename GO,typename NodeT>
110  const Teuchos::ParameterList& p)
111  : globalIndexer_(indexer)
112  , globalDataKey_("Residual Scatter Container")
113 {
114  std::string scatterName = p.get<std::string>("Scatter Name");
115  scatterHolder_ =
116  Teuchos::rcp(new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(new PHX::MDALayout<Dummy>(0))));
117 
118  // get names to be evaluated
119  const std::vector<std::string>& names =
120  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
121 
122  // grab map from evaluated names to field names
123  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
124 
125  // determine if we are scattering an initial condition
126  scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get<bool>("Scatter Initial Condition") : false;
127 
128  Teuchos::RCP<PHX::DataLayout> dl = (!scatterIC_) ?
129  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional :
130  p.get< Teuchos::RCP<const panzer::PureBasis> >("Basis")->functional;
131  if (!scatterIC_) {
132  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
133  local_side_id_ = p.get<int>("Local Side ID");
134  }
135 
136  // build the vector of fields that this is dependent on
137  scatterFields_.resize(names.size());
138  for (std::size_t eq = 0; eq < names.size(); ++eq) {
139  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
140 
141  // tell the field manager that we depend on this field
142  this->addDependentField(scatterFields_[eq]);
143  }
144 
145  checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get<bool>("Check Apply BC") : false;
146  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
147  if (checkApplyBC_) {
148  for (std::size_t eq = 0; eq < names.size(); ++eq) {
149  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
150  this->addDependentField(applyBC_[eq]);
151  }
152  }
153 
154  // this is what this evaluator provides
155  this->addEvaluatedField(*scatterHolder_);
156 
157  if (p.isType<std::string>("Global Data Key"))
158  globalDataKey_ = p.get<std::string>("Global Data Key");
159 
160  this->setName(scatterName+" Scatter Residual");
161 }
162 
163 // **********************************************************************
164 template <typename TRAITS,typename LO,typename GO,typename NodeT>
166 postRegistrationSetup(typename TRAITS::SetupData d,
167  PHX::FieldManager<TRAITS>& /* fm */)
168 {
169  const Workset & workset_0 = (*d.worksets_)[0];
170  const std::string blockId = this->wda(workset_0).block_id;
171 
172  fieldIds_.resize(scatterFields_.size());
173  fieldOffsets_.resize(scatterFields_.size());
174  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
175  fieldGlobalIndexers_.resize(scatterFields_.size());
176  productVectorBlockIndex_.resize(scatterFields_.size());
177  int maxElementBlockGIDCount = -1;
178  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
179  // get field ID from DOF manager
180  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
181 
182  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
183  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
184  fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
185  fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer
186 
187  // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC
188  if (!scatterIC_) {
189  const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
190  {
191  const auto& offsets = offsetPair.first;
192  fieldOffsets_[fd] = Kokkos::View<int*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
193  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
194  for (std::size_t i=0; i < offsets.size(); ++i)
195  hostOffsets(i) = offsets[i];
196  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
197  }
198  {
199  const auto& basisIndex = offsetPair.second;
200  basisIndexForMDFieldOffsets_[fd] = Kokkos::View<int*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Residual):basisIndexForMDFieldOffsets",basisIndex.size());
201  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
202  for (std::size_t i=0; i < basisIndex.size(); ++i)
203  hostBasisIndex(i) = basisIndex[i];
204  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
205  }
206  }
207  else {
208  // For ICs, only need offsets, not basisIndex
209  const std::vector<int>& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
210  fieldOffsets_[fd] = Kokkos::View<int*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
211  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
212  for (std::size_t i=0; i < offsets.size(); ++i)
213  hostOffsets(i) = offsets[i];
214  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
215  }
216 
217  maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
218  typename PHX::Device().fence();
219  }
220 
221  // We will use one workset lid view for all fields, but has to be
222  // sized big enough to hold the largest elementBlockGIDCount in the
223  // ProductVector.
224  worksetLIDs_ = Kokkos::View<LO**,PHX::Device>("ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
225  scatterFields_[0].extent(0),
226  maxElementBlockGIDCount);
227 }
228 
229 // **********************************************************************
230 template <typename TRAITS,typename LO,typename GO,typename NodeT>
232 preEvaluate(typename TRAITS::PreEvalData d)
233 {
234  // extract dirichlet counter from container
235  Teuchos::RCP<const ContainerType> blockContainer
236  = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
237 
238  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
239  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
240 
241  // extract linear object container
242  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
243  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
244 }
245 
246 // **********************************************************************
247 template <typename TRAITS,typename LO,typename GO,typename NodeT>
249 evaluateFields(typename TRAITS::EvalData workset)
250 {
251  using Teuchos::RCP;
252  using Teuchos::rcp_dynamic_cast;
253  using Thyra::VectorBase;
255 
256  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
257 
258  RCP<ProductVectorBase<double> > thyraScatterTarget = (!scatterIC_) ?
259  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),true) :
260  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_x(),true);
261 
262  // Loop over scattered fields
263  int currentWorksetLIDSubBlock = -1;
264  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
265  // workset LIDs only change for different sub blocks
266  if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
267  fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
268  currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
269  }
270 
271  // Get Scatter target block
272  const auto& tpetraScatterTarget = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
273  const auto& kokkosScatterTarget = tpetraScatterTarget.template getLocalView<PHX::mem_space>();
274 
275  // Get dirichlet counter block
276  const auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
277  const auto& kokkosDirichletCounter = tpetraDirichletCounter.template getLocalView<PHX::mem_space>();
278 
279  // Class data fields for lambda capture
280  const auto fieldOffsets = fieldOffsets_[fieldIndex];
281  const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
282  const auto worksetLIDs = worksetLIDs_;
283  const auto fieldValues = scatterFields_[fieldIndex];
284  const auto applyBC = applyBC_[fieldIndex].get_static_view();
285  const bool checkApplyBC = checkApplyBC_;
286 
287  if (!scatterIC_) {
288 
289  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
290  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
291  const int lid = worksetLIDs(cell,fieldOffsets(basis));
292  if (lid < 0) // not on this processor!
293  continue;
294  const int basisIndex = basisIndices(basis);
295 
296  // Possible warp divergence for hierarchic
297  if (checkApplyBC)
298  if (!applyBC(cell,basisIndex))
299  continue;
300 
301  kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex);
302  kokkosDirichletCounter(lid,0) = 1.0;
303  }
304  });
305 
306  } else {
307 
308  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
309  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
310  const int lid = worksetLIDs(cell,fieldOffsets(basis));
311  if (lid < 0) // not on this processor!
312  continue;
313  kokkosScatterTarget(lid,0) = fieldValues(cell,basis);
314  kokkosDirichletCounter(lid,0) = 1.0;
315  }
316  });
317 
318  }
319  }
320 
321 }
322 
323 // **********************************************************************
324 // Specialization: Jacobian
325 // **********************************************************************
326 
327 template <typename TRAITS,typename LO,typename GO,typename NodeT>
330  const Teuchos::ParameterList& p)
331  : globalIndexer_(indexer)
332  , globalDataKey_("Residual Scatter Container")
333 {
334  std::string scatterName = p.get<std::string>("Scatter Name");
335  scatterHolder_ =
336  Teuchos::rcp(new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(new PHX::MDALayout<Dummy>(0))));
337 
338  // get names to be evaluated
339  const std::vector<std::string>& names =
340  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
341 
342  // grab map from evaluated names to field names
343  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
344 
346  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
347 
348  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
349  local_side_id_ = p.get<int>("Local Side ID");
350 
351  // build the vector of fields that this is dependent on
352  scatterFields_.resize(names.size());
353  for (std::size_t eq = 0; eq < names.size(); ++eq) {
354  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
355 
356  // tell the field manager that we depend on this field
357  this->addDependentField(scatterFields_[eq]);
358  }
359 
360  checkApplyBC_ = p.get<bool>("Check Apply BC");
361  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
362  if (checkApplyBC_) {
363  for (std::size_t eq = 0; eq < names.size(); ++eq) {
364  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
365  this->addDependentField(applyBC_[eq]);
366  }
367  }
368 
369  // this is what this evaluator provides
370  this->addEvaluatedField(*scatterHolder_);
371 
372  if (p.isType<std::string>("Global Data Key"))
373  globalDataKey_ = p.get<std::string>("Global Data Key");
374 
375  this->setName(scatterName+" Scatter Residual (Jacobian)");
376 }
377 
378 // **********************************************************************
379 template <typename TRAITS,typename LO,typename GO,typename NodeT>
381 postRegistrationSetup(typename TRAITS::SetupData d,
382  PHX::FieldManager<TRAITS>& /* fm */)
383 {
384  const Workset & workset_0 = (*d.worksets_)[0];
385  const std::string blockId = this->wda(workset_0).block_id;
386 
387  fieldIds_.resize(scatterFields_.size());
388  fieldOffsets_.resize(scatterFields_.size());
389  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
390  productVectorBlockIndex_.resize(scatterFields_.size());
391  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
392 
393  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
394  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
395  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
396  const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
397  fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName); // Field number in the sub-global-indexer
398 
399  const auto& offsetPair = fieldGlobalIndexer->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
400  {
401  const auto& offsets = offsetPair.first;
402  fieldOffsets_[fd] = Kokkos::View<int*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
403  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
404  for (std::size_t i=0; i < offsets.size(); ++i)
405  hostOffsets(i) = offsets[i];
406  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
407  }
408  {
409  const auto& basisIndex = offsetPair.second;
410  basisIndexForMDFieldOffsets_[fd] = Kokkos::View<int*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Jacobian):basisIndexForMDFieldOffsets",basisIndex.size());
411  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
412  for (std::size_t i=0; i < basisIndex.size(); ++i)
413  hostBasisIndex(i) = basisIndex[i];
414  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
415  }
416  }
417 
418  // This is sized differently than the Residual implementation since
419  // we need the LIDs for all sub-blocks, not just the single
420  // sub-block for the field residual scatter.
421  int elementBlockGIDCount = 0;
422  for (const auto blockDOFMgr : globalIndexer_->getFieldDOFManagers())
423  elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
424 
425  worksetLIDs_ = Kokkos::View<LO**,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Jacobian):worksetLIDs",
426  scatterFields_[0].extent(0),
427  elementBlockGIDCount);
428 
429  // Compute the block offsets
430  const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
431  const int numBlocks = static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
432  blockOffsets_ = Kokkos::View<LO*,PHX::Device>("ScatterDirichletResidual_BlockedTpetra(Jacobian):blockOffsets_",
433  numBlocks+1); // Number of fields, plus a sentinel
434  const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
435  for (int blk=0;blk<numBlocks;blk++) {
436  int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
437  hostBlockOffsets(blk) = blockOffset;
438  }
439  blockOffsets_(numBlocks) = blockOffsets_(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
440  Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
441 
442  // Make sure the that hard coded derivative dimension in the
443  // evaluate call is large enough to hold all derivatives for each
444  // sub block load
445  for (int blk=0;blk<numBlocks;blk++) {
446  const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
447  TEUCHOS_TEST_FOR_EXCEPTION(blockDerivativeSize > maxDerivativeArraySize_, std::runtime_error,
448  "ERROR: the derivative dimension for sub block "
449  << blk << "with a value of " << blockDerivativeSize
450  << "is larger than the size allocated for cLIDs and vals "
451  << "in the evaluate call! You must manually increase the "
452  << "size and recompile!");
453  }
454 
455  typename PHX::Device().fence();
456 }
457 
458 // **********************************************************************
459 template <typename TRAITS,typename LO,typename GO,typename NodeT>
461 preEvaluate(typename TRAITS::PreEvalData d)
462 {
463  // extract dirichlet counter from container
464  Teuchos::RCP<const ContainerType> blockContainer
465  = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
466 
467  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
468  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
469 
470  // extract linear object container
471  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
472  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
473 }
474 
475 // **********************************************************************
476 template <typename TRAITS,typename LO,typename GO,typename NodeT>
478 evaluateFields(typename TRAITS::EvalData workset)
479 {
480  using Teuchos::RCP;
481  using Teuchos::rcp_dynamic_cast;
482  using Thyra::VectorBase;
485 
486  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
487 
488  const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
489  const RCP<const ContainerType> blockedContainer = blockedContainer_;
490  const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double>>(blockedContainer_->get_f());
491  const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
492  const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double>>(blockedContainer_->get_A(),true);
493 
494  // Get the local data for the sub-block crs matrices. First allocate
495  // on host and then deep_copy to device. The sub-blocks are
496  // unmanaged since they are allocated and ref counted separately on
497  // host.
498  using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
499  typename Kokkos::View<LocalMatrixType**,PHX::Device>::HostMirror
500  hostJacTpetraBlocks("panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
501 
502  Kokkos::View<int**,PHX::Device> blockExistsInJac = Kokkos::View<int**,PHX::Device>("blockExistsInJac_",numFieldBlocks,numFieldBlocks);
503  auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
504 
505  for (int row=0; row < numFieldBlocks; ++row) {
506  for (int col=0; col < numFieldBlocks; ++col) {
507  const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),false);
508  if (nonnull(thyraTpetraOperator)) {
509 
510  // HACK to enforce views in the CrsGraph to be
511  // Unmanaged. Passing in the MemoryTrait<Unmanaged> doesn't
512  // work as the CrsGraph in the CrsMatrix ignores the
513  // MemoryTrait. Need to use the runtime constructor by passing
514  // in points to ensure Unmanaged. See:
515  // https://github.com/kokkos/kokkos/issues/1581
516 
517  // These two lines are the original code we can revert to when #1581 is fixed.
518  // const auto crsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
519  // new (&hostJacTpetraBlocks(row,col)) KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>> (crsMatrix->getLocalMatrix());
520 
521  // Instead do this
522  {
523  // Grab the local managed matrix and graph
524  const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
525  const auto managedMatrix = tpetraCrsMatrix->getLocalMatrix();
526  const auto managedGraph = managedMatrix.graph;
527 
528  // Create runtime unmanaged versions
529  using StaticCrsGraphType = typename LocalMatrixType::StaticCrsGraphType;
530  StaticCrsGraphType unmanagedGraph;
531  unmanagedGraph.entries = typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
532  unmanagedGraph.row_map = typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
533  unmanagedGraph.row_block_offsets = typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
534 
535  typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
536  LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
537  new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
538  }
539 
540  hostBlockExistsInJac(row,col) = 1;
541  }
542  else {
543  hostBlockExistsInJac(row,col) = 0;
544  }
545  }
546  }
547  typename Kokkos::View<LocalMatrixType**,PHX::Device>
548  jacTpetraBlocks("panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
549  Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
550  Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
551  typename PHX::Device().fence();
552 
553  // worksetLIDs is larger for Jacobian than Residual fill. Need the
554  // entire set of field offsets for derivative indexing no matter
555  // which block row you are scattering. The residual only needs the
556  // lids for the sub-block that it is scattering to. The subviews
557  // below are to offset the LID blocks correctly.
558  const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
559  for (size_t block=0; block < globalIndexers.size(); ++block) {
560  const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_(block),blockOffsets_(block+1)));
561  globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
562  }
563 
564  // Loop over scattered fields
565  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
566 
567  const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
568  typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
569  if (haveResidual) {
570  const auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
571  kokkosResidual = tpetraResidual.template getLocalView<PHX::mem_space>();
572  }
573 
574  // Get dirichlet counter block
575  const auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
576  const auto& kokkosDirichletCounter = tpetraDirichletCounter.template getLocalView<PHX::mem_space>();
577 
578  // Class data fields for lambda capture
579  const Kokkos::View<const int*,PHX::Device> fieldOffsets = fieldOffsets_[fieldIndex];
580  const auto& basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
581  const Kokkos::View<const LO**,PHX::Device> worksetLIDs = worksetLIDs_;
582  const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
583  const Kokkos::View<const LO*,PHX::Device> blockOffsets = blockOffsets_;
584  const auto& applyBC = applyBC_[fieldIndex].get_static_view();
585  const bool checkApplyBC = checkApplyBC_;
586 
587  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
588  LO cLIDs[maxDerivativeArraySize_];
589  typename Sacado::ScalarType<ScalarT>::type vals[maxDerivativeArraySize_];
590 
591  for(int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
592  const int rowLID = worksetLIDs(cell,fieldOffsets(basis));
593 
594  if (rowLID < 0) // not on this processor!
595  continue;
596 
597  const int basisIndex = basisIndices(basis);
598 
599  // Possible warp divergence for hierarchic
600  if (checkApplyBC)
601  if (!applyBC(cell,basisIndex))
602  continue;
603 
604  typedef PHX::MDField<const ScalarT,Cell,NODE> FieldType;
605  typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basisIndex);
606 
607  if (haveResidual)
608  kokkosResidual(rowLID,0) = tmpFieldVal.val();
609 
610  kokkosDirichletCounter(rowLID,0) = 1.0;
611 
612  // Zero out entire matrix row
613  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
614  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
615  const auto& rowEntries = jacTpetraBlocks(blockRowIndex,blockColIndex).row(rowLID);
616  for (int i=0; i < rowEntries.length; ++i)
617  rowEntries.value(i) = Traits::RealType(0.0);
618  }
619  }
620 
621  // Set values
622  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
623  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
624  const int start = blockOffsets(blockColIndex);
625  const int stop = blockOffsets(blockColIndex+1);
626  const int sensSize = stop-start;
627  // Views may be padded. Use contiguous arrays here
628  for (int i=0; i < sensSize; ++i) {
629  cLIDs[i] = worksetLIDs(cell,start+i);
630  vals[i] = tmpFieldVal.fastAccessDx(start+i);
631  }
632  jacTpetraBlocks(blockRowIndex,blockColIndex).replaceValues(rowLID,cLIDs,sensSize,vals,true,true);
633  }
634  }
635  }
636  });
637 
638  }
639 
640  // Placement delete on view of matrices
641  for (int row=0; row < numFieldBlocks; ++row) {
642  for (int col=0; col < numFieldBlocks; ++col) {
643  if (hostBlockExistsInJac(row,col)) {
644  hostJacTpetraBlocks(row,col).~CrsMatrix();
645  }
646  }
647  }
648 
649 }
650 
651 // **********************************************************************
652 
653 #endif
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
bool is_null(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
bool nonnull(const boost::shared_ptr< T > &p)
bool isType(const std::string &name) const
Pushes residual values into the residual vector for a Newton-based solve.
#define TEUCHOS_ASSERT(assertion_test)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)
Kokkos::View< const int *, PHX::Device > offsets