Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Panzer: A partial differential equation assembly
5 // engine for strongly coupled complex multiphysics systems
6 // Copyright (2011) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Roger P. Pawlowski (rppawlo@sandia.gov) and
39 // Eric C. Cyr (eccyr@sandia.gov)
40 // ***********************************************************************
41 // @HEADER
42 
43 #ifndef PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
44 #define PANZER_SCATTER_DIRICHLET_RESIDUAL_BLOCEDTPETRA_IMPL_HPP
45 
46 #include "Teuchos_RCP.hpp"
47 #include "Teuchos_Assert.hpp"
48 
49 #include "Phalanx_DataLayout.hpp"
50 
51 // #include "Epetra_Map.h"
52 // #include "Epetra_Vector.h"
53 // #include "Epetra_CrsMatrix.h"
54 
55 #include "Panzer_GlobalIndexer.hpp"
57 #include "Panzer_PureBasis.hpp"
60 
61 #include "Phalanx_DataLayout_MDALayout.hpp"
62 
63 #include "Thyra_SpmdVectorBase.hpp"
64 #include "Thyra_ProductVectorBase.hpp"
65 #include "Thyra_BlockedLinearOpBase.hpp"
66 // #include "Thyra_get_Epetra_Operator.hpp"
67 
68 #include "Teuchos_FancyOStream.hpp"
69 
70 #include <unordered_map>
71 
72 template <typename EvalT,typename TRAITS,typename LO,typename GO,typename NodeT>
75  const Teuchos::ParameterList& p)
76 {
77  std::string scatterName = p.get<std::string>("Scatter Name");
78  Teuchos::RCP<PHX::FieldTag> scatterHolder =
80 
81  // get names to be evaluated
82  const std::vector<std::string>& names =
83  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
84 
86  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
87 
88  // build the vector of fields that this is dependent on
89  for (std::size_t eq = 0; eq < names.size(); ++eq) {
91 
92  // tell the field manager that we depend on this field
93  this->addDependentField(scatterField.fieldTag());
94  }
95 
96  // this is what this evaluator provides
97  this->addEvaluatedField(*scatterHolder);
98 
99  this->setName(scatterName+" Scatter Residual");
100 }
101 
102 // **********************************************************************
103 // Specialization: Residual
104 // **********************************************************************
105 
106 
107 template <typename TRAITS,typename LO,typename GO,typename NodeT>
110  const Teuchos::ParameterList& p)
111  : globalIndexer_(indexer)
112  , globalDataKey_("Residual Scatter Container")
113 {
114  std::string scatterName = p.get<std::string>("Scatter Name");
115  scatterHolder_ =
117 
118  // get names to be evaluated
119  const std::vector<std::string>& names =
120  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
121 
122  // grab map from evaluated names to field names
123  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
124 
125  // determine if we are scattering an initial condition
126  scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get<bool>("Scatter Initial Condition") : false;
127 
128  Teuchos::RCP<PHX::DataLayout> dl = (!scatterIC_) ?
129  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional :
130  p.get< Teuchos::RCP<const panzer::PureBasis> >("Basis")->functional;
131  if (!scatterIC_) {
132  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
133  local_side_id_ = p.get<int>("Local Side ID");
134  }
135 
136  // build the vector of fields that this is dependent on
137  scatterFields_.resize(names.size());
138  for (std::size_t eq = 0; eq < names.size(); ++eq) {
139  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
140 
141  // tell the field manager that we depend on this field
142  this->addDependentField(scatterFields_[eq]);
143  }
144 
145  checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get<bool>("Check Apply BC") : false;
146  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
147  if (checkApplyBC_) {
148  for (std::size_t eq = 0; eq < names.size(); ++eq) {
149  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
150  this->addDependentField(applyBC_[eq]);
151  }
152  }
153 
154  // this is what this evaluator provides
155  this->addEvaluatedField(*scatterHolder_);
156 
157  if (p.isType<std::string>("Global Data Key"))
158  globalDataKey_ = p.get<std::string>("Global Data Key");
159 
160  this->setName(scatterName+" Scatter Residual");
161 }
162 
163 // **********************************************************************
164 template <typename TRAITS,typename LO,typename GO,typename NodeT>
166 postRegistrationSetup(typename TRAITS::SetupData d,
167  PHX::FieldManager<TRAITS>& /* fm */)
168 {
169  const Workset & workset_0 = (*d.worksets_)[0];
170  const std::string blockId = this->wda(workset_0).block_id;
171 
172  fieldIds_.resize(scatterFields_.size());
173  fieldOffsets_.resize(scatterFields_.size());
174  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
175  fieldGlobalIndexers_.resize(scatterFields_.size());
176  productVectorBlockIndex_.resize(scatterFields_.size());
177  int maxElementBlockGIDCount = -1;
178  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
179  // get field ID from DOF manager
180  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
181 
182  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
183  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
184  fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
185  fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer
186 
187  // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC
188  if (!scatterIC_) {
189  const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
190  {
191  const auto& offsets = offsetPair.first;
192  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
193  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
194  for (std::size_t i=0; i < offsets.size(); ++i)
195  hostOffsets(i) = offsets[i];
196  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
197  }
198  {
199  const auto& basisIndex = offsetPair.second;
200  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):basisIndexForMDFieldOffsets",basisIndex.size());
201  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
202  for (std::size_t i=0; i < basisIndex.size(); ++i)
203  hostBasisIndex(i) = basisIndex[i];
204  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
205  }
206  }
207  else {
208  // For ICs, only need offsets, not basisIndex
209  const std::vector<int>& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
210  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Residual):fieldOffsets",offsets.size());
211  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
212  for (std::size_t i=0; i < offsets.size(); ++i)
213  hostOffsets(i) = offsets[i];
214  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
215  }
216 
217  maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
218  }
219 
220  // We will use one workset lid view for all fields, but has to be
221  // sized big enough to hold the largest elementBlockGIDCount in the
222  // ProductVector.
223  worksetLIDs_ = PHX::View<LO**>("ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
224  scatterFields_[0].extent(0),
225  maxElementBlockGIDCount);
226 }
227 
228 // **********************************************************************
229 template <typename TRAITS,typename LO,typename GO,typename NodeT>
231 preEvaluate(typename TRAITS::PreEvalData d)
232 {
233  // extract dirichlet counter from container
234  Teuchos::RCP<const ContainerType> blockContainer
235  = Teuchos::rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
236 
237  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
238  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
239 
240  // extract linear object container
241  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
242  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
243 }
244 
245 // **********************************************************************
246 template <typename TRAITS,typename LO,typename GO,typename NodeT>
248 evaluateFields(typename TRAITS::EvalData workset)
249 {
250  using Teuchos::RCP;
251  using Teuchos::rcp_dynamic_cast;
252  using Thyra::VectorBase;
254 
255  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
256 
257  RCP<ProductVectorBase<double> > thyraScatterTarget = (!scatterIC_) ?
258  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),true) :
259  rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_x(),true);
260 
261  // Loop over scattered fields
262  int currentWorksetLIDSubBlock = -1;
263  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
264  // workset LIDs only change for different sub blocks
265  if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
266  fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
267  currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
268  }
269 
270  // Get Scatter target block
271  auto& tpetraScatterTarget = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
272  const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite);
273 
274  // Get dirichlet counter block
275  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector());
276  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
277 
278  // Class data fields for lambda capture
279  const auto fieldOffsets = fieldOffsets_[fieldIndex];
280  const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
281  const auto worksetLIDs = worksetLIDs_;
282  const auto fieldValues = scatterFields_[fieldIndex].get_static_view();
283  const auto applyBC = applyBC_[fieldIndex].get_static_view();
284  const bool checkApplyBC = checkApplyBC_;
285 
286  if (!scatterIC_) {
287 
288  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
289  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
290  const int lid = worksetLIDs(cell,fieldOffsets(basis));
291  if (lid < 0) // not on this processor!
292  continue;
293  const int basisIndex = basisIndices(basis);
294 
295  // Possible warp divergence for hierarchic
296  if (checkApplyBC)
297  if (!applyBC(cell,basisIndex))
298  continue;
299 
300  kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex);
301  kokkosDirichletCounter(lid,0) = 1.0;
302  }
303  });
304 
305  } else {
306 
307  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
308  for (int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
309  const int lid = worksetLIDs(cell,fieldOffsets(basis));
310  if (lid < 0) // not on this processor!
311  continue;
312  kokkosScatterTarget(lid,0) = fieldValues(cell,basis);
313  kokkosDirichletCounter(lid,0) = 1.0;
314  }
315  });
316 
317  }
318  }
319 
320 }
321 
322 // **********************************************************************
323 // Specialization: Jacobian
324 // **********************************************************************
325 
326 template <typename TRAITS,typename LO,typename GO,typename NodeT>
329  const Teuchos::ParameterList& p)
330  : globalIndexer_(indexer)
331  , globalDataKey_("Residual Scatter Container")
332 {
333  std::string scatterName = p.get<std::string>("Scatter Name");
334  scatterHolder_ =
336 
337  // get names to be evaluated
338  const std::vector<std::string>& names =
339  *(p.get< Teuchos::RCP< std::vector<std::string> > >("Dependent Names"));
340 
341  // grab map from evaluated names to field names
342  fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >("Dependent Map");
343 
345  p.get< Teuchos::RCP<panzer::PureBasis> >("Basis")->functional;
346 
347  side_subcell_dim_ = p.get<int>("Side Subcell Dimension");
348  local_side_id_ = p.get<int>("Local Side ID");
349 
350  // build the vector of fields that this is dependent on
351  scatterFields_.resize(names.size());
352  for (std::size_t eq = 0; eq < names.size(); ++eq) {
353  scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
354 
355  // tell the field manager that we depend on this field
356  this->addDependentField(scatterFields_[eq]);
357  }
358 
359  checkApplyBC_ = p.get<bool>("Check Apply BC");
360  applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture
361  if (checkApplyBC_) {
362  for (std::size_t eq = 0; eq < names.size(); ++eq) {
363  applyBC_[eq] = PHX::MDField<const bool,Cell,NODE>(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl);
364  this->addDependentField(applyBC_[eq]);
365  }
366  }
367 
368  // this is what this evaluator provides
369  this->addEvaluatedField(*scatterHolder_);
370 
371  if (p.isType<std::string>("Global Data Key"))
372  globalDataKey_ = p.get<std::string>("Global Data Key");
373 
374  this->setName(scatterName+" Scatter Residual (Jacobian)");
375 }
376 
377 // **********************************************************************
378 template <typename TRAITS,typename LO,typename GO,typename NodeT>
380 postRegistrationSetup(typename TRAITS::SetupData d,
381  PHX::FieldManager<TRAITS>& /* fm */)
382 {
383  const Workset & workset_0 = (*d.worksets_)[0];
384  const std::string blockId = this->wda(workset_0).block_id;
385 
386  fieldIds_.resize(scatterFields_.size());
387  fieldOffsets_.resize(scatterFields_.size());
388  basisIndexForMDFieldOffsets_.resize(scatterFields_.size());
389  productVectorBlockIndex_.resize(scatterFields_.size());
390  for(std::size_t fd=0;fd<scatterFields_.size();++fd) {
391 
392  std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
393  const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager
394  productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
395  const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
396  fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName); // Field number in the sub-global-indexer
397 
398  const auto& offsetPair = fieldGlobalIndexer->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_);
399  {
400  const auto& offsets = offsetPair.first;
401  fieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):fieldOffsets",offsets.size());
402  auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
403  for (std::size_t i=0; i < offsets.size(); ++i)
404  hostOffsets(i) = offsets[i];
405  Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
406  }
407  {
408  const auto& basisIndex = offsetPair.second;
409  basisIndexForMDFieldOffsets_[fd] = PHX::View<int*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):basisIndexForMDFieldOffsets",basisIndex.size());
410  auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]);
411  for (std::size_t i=0; i < basisIndex.size(); ++i)
412  hostBasisIndex(i) = basisIndex[i];
413  Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex);
414  }
415  }
416 
417  // This is sized differently than the Residual implementation since
418  // we need the LIDs for all sub-blocks, not just the single
419  // sub-block for the field residual scatter.
420  int elementBlockGIDCount = 0;
421  for (const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
422  elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
423 
424  worksetLIDs_ = PHX::View<LO**>("ScatterDirichletResidual_BlockedTpetra(Jacobian):worksetLIDs",
425  scatterFields_[0].extent(0),
426  elementBlockGIDCount);
427 
428  // Compute the block offsets
429  const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
430  const int numBlocks = static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
431  blockOffsets_ = PHX::View<LO*>("ScatterDirichletResidual_BlockedTpetra(Jacobian):blockOffsets_",
432  numBlocks+1); // Number of fields, plus a sentinel
433  const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
434  for (int blk=0;blk<numBlocks;blk++) {
435  int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
436  hostBlockOffsets(blk) = blockOffset;
437  }
438  hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
439  Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
440 
441  // Make sure the that hard coded derivative dimension in the
442  // evaluate call is large enough to hold all derivatives for each
443  // sub block load
444  for (int blk=0;blk<numBlocks;blk++) {
445  const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
446  TEUCHOS_TEST_FOR_EXCEPTION(blockDerivativeSize > maxDerivativeArraySize_, std::runtime_error,
447  "ERROR: the derivative dimension for sub block "
448  << blk << "with a value of " << blockDerivativeSize
449  << "is larger than the size allocated for cLIDs and vals "
450  << "in the evaluate call! You must manually increase the "
451  << "size and recompile!");
452  }
453 }
454 
455 // **********************************************************************
456 template <typename TRAITS,typename LO,typename GO,typename NodeT>
458 preEvaluate(typename TRAITS::PreEvalData d)
459 {
460  // extract dirichlet counter from container
461  Teuchos::RCP<const ContainerType> blockContainer
462  = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject("Dirichlet Counter"),true);
463 
464  dirichletCounter_ = Teuchos::rcp_dynamic_cast<Thyra::ProductVectorBase<double> >(blockContainer->get_f(),true);
465  TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_));
466 
467  // extract linear object container
468  blockedContainer_ = Teuchos::rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_),true);
469  TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_));
470 }
471 
472 // **********************************************************************
473 template <typename TRAITS,typename LO,typename GO,typename NodeT>
475 evaluateFields(typename TRAITS::EvalData workset)
476 {
477  using Teuchos::RCP;
478  using Teuchos::rcp_dynamic_cast;
479  using Thyra::VectorBase;
482 
483  const auto& localCellIds = this->wda(workset).cell_local_ids_k;
484 
485  const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
486  const RCP<const ContainerType> blockedContainer = blockedContainer_;
487  const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double>>(blockedContainer_->get_f());
488  const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
489  const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double>>(blockedContainer_->get_A(),true);
490 
491  // Get the local data for the sub-block crs matrices. First allocate
492  // on host and then deep_copy to device. The sub-blocks are
493  // unmanaged since they are allocated and ref counted separately on
494  // host.
495  using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>, size_t>;
496  typename PHX::View<LocalMatrixType**>::HostMirror
497  hostJacTpetraBlocks("panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
498 
499  PHX::View<int**> blockExistsInJac = PHX::View<int**>("blockExistsInJac_",numFieldBlocks,numFieldBlocks);
500  auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
501 
502  for (int row=0; row < numFieldBlocks; ++row) {
503  for (int col=0; col < numFieldBlocks; ++col) {
504  const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),false);
505  if (nonnull(thyraTpetraOperator)) {
506 
507  // HACK to enforce views in the CrsGraph to be
508  // Unmanaged. Passing in the MemoryTrait<Unmanaged> doesn't
509  // work as the CrsGraph in the CrsMatrix ignores the
510  // MemoryTrait. Need to use the runtime constructor by passing
511  // in points to ensure Unmanaged. See:
512  // https://github.com/kokkos/kokkos/issues/1581
513 
514  // These two lines are the original code we can revert to when #1581 is fixed.
515  // const auto crsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
516  // new (&hostJacTpetraBlocks(row,col)) KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>> (crsMatrix->getLocalMatrix());
517 
518  // Instead do this
519  {
520  // Grab the local managed matrix and graph
521  const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),true);
522  const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
523  const auto managedGraph = managedMatrix.graph;
524 
525  // Create runtime unmanaged versions
526  using StaticCrsGraphType = typename LocalMatrixType::StaticCrsGraphType;
527  StaticCrsGraphType unmanagedGraph;
528  unmanagedGraph.entries = typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
529  unmanagedGraph.row_map = typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
530  unmanagedGraph.row_block_offsets = typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
531 
532  typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
533  LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
534  new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
535  }
536 
537  hostBlockExistsInJac(row,col) = 1;
538  }
539  else {
540  hostBlockExistsInJac(row,col) = 0;
541  }
542  }
543  }
544  typename PHX::View<LocalMatrixType**>
545  jacTpetraBlocks("panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
546  Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
547  Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
548 
549  // worksetLIDs is larger for Jacobian than Residual fill. Need the
550  // entire set of field offsets for derivative indexing no matter
551  // which block row you are scattering. The residual only needs the
552  // lids for the sub-block that it is scattering to. The subviews
553  // below are to offset the LID blocks correctly.
554  const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
555  auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
556  Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
557  for (size_t block=0; block < globalIndexers.size(); ++block) {
558  const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
559  globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
560  }
561 
562  // Loop over scattered fields
563  for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
564 
565  const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
566  typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
567  if (haveResidual) {
568  auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
569  kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
570  }
571 
572  // Get dirichlet counter block
573  auto& tpetraDirichletCounter = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(dirichletCounter_->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector());
574  const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite);
575 
576  // Class data fields for lambda capture
577  const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
578  const auto& basisIndices = basisIndexForMDFieldOffsets_[fieldIndex];
579  const PHX::View<const LO**> worksetLIDs = worksetLIDs_;
580  const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
581  const PHX::View<const LO*> blockOffsets = blockOffsets_;
582  const auto& applyBC = applyBC_[fieldIndex].get_static_view();
583  const bool checkApplyBC = checkApplyBC_;
584 
585  Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) {
586  LO cLIDs[maxDerivativeArraySize_];
587  typename Sacado::ScalarType<ScalarT>::type vals[maxDerivativeArraySize_];
588 
589  for(int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
590  const int block_row_offset = blockOffsets(blockRowIndex);
591  const int rowLID = worksetLIDs(cell,block_row_offset+fieldOffsets(basis));
592 
593  if (rowLID < 0) // not on this processor!
594  continue;
595 
596  const int basisIndex = basisIndices(basis);
597 
598  // Possible warp divergence for hierarchic
599  if (checkApplyBC)
600  if (!applyBC(cell,basisIndex))
601  continue;
602 
604  typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basisIndex);
605 
606  if (haveResidual)
607  kokkosResidual(rowLID,0) = tmpFieldVal.val();
608 
609  kokkosDirichletCounter(rowLID,0) = 1.0;
610 
611  // Zero out entire matrix row
612  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
613  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
614  const auto& rowEntries = jacTpetraBlocks(blockRowIndex,blockColIndex).row(rowLID);
615  for (int i=0; i < rowEntries.length; ++i)
616  rowEntries.value(i) = Traits::RealType(0.0);
617  }
618  }
619 
620  // Set values
621  for (int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
622  if (blockExistsInJac(blockRowIndex,blockColIndex)) {
623  const int start = blockOffsets(blockColIndex);
624  const int stop = blockOffsets(blockColIndex+1);
625  const int sensSize = stop-start;
626  // Views may be padded. Use contiguous arrays here
627  for (int i=0; i < sensSize; ++i) {
628  cLIDs[i] = worksetLIDs(cell,start+i);
629  vals[i] = tmpFieldVal.fastAccessDx(start+i);
630  }
631  jacTpetraBlocks(blockRowIndex,blockColIndex).replaceValues(rowLID,cLIDs,sensSize,vals,true,true);
632  }
633  }
634  }
635  });
636 
637  }
638 
639  // Placement delete on view of matrices
640  for (int row=0; row < numFieldBlocks; ++row) {
641  for (int col=0; col < numFieldBlocks; ++col) {
642  if (hostBlockExistsInJac(row,col)) {
643  hostJacTpetraBlocks(row,col).~CrsMatrix();
644  }
645  }
646  }
647 
648 }
649 
650 // **********************************************************************
651 
652 #endif
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
bool is_null(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP< const BlockedDOFManager > &)
FieldType
The type of discretization to use for a field pattern.
PHX::View< const int * > offsets
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
virtual void preEvaluate(typename Traits::PreEvalData d)=0
bool nonnull(const boost::shared_ptr< T > &p)
std::string block_id
DEPRECATED - use: getElementBlock()
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
void postRegistrationSetup(typename TRAITS::SetupData, PHX::FieldManager< TRAITS > &)