MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
55 #include "Xpetra_TripleMatrixMultiply.hpp"
56 #include "Xpetra_CrsMatrixUtils.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_SaPFactory.hpp"
73 #include "MueLu_AggregationExportFactory.hpp"
74 #include "MueLu_Utilities.hpp"
75 
76 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
77 #include "MueLu_AmalgamationFactory_kokkos.hpp"
78 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
79 #include "MueLu_CoarseMapFactory_kokkos.hpp"
80 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
81 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
82 #include "MueLu_TentativePFactory_kokkos.hpp"
83 #include "MueLu_SaPFactory_kokkos.hpp"
84 #include "MueLu_Utilities_kokkos.hpp"
85 #include <Kokkos_Core.hpp>
86 #include <KokkosSparse_CrsMatrix.hpp>
87 #endif
88 
89 #include "MueLu_ZoltanInterface.hpp"
90 #include "MueLu_Zoltan2Interface.hpp"
91 #include "MueLu_RepartitionHeuristicFactory.hpp"
92 #include "MueLu_RepartitionFactory.hpp"
93 #include "MueLu_RebalanceAcFactory.hpp"
94 #include "MueLu_RebalanceTransferFactory.hpp"
95 
96 #include "MueLu_VerbosityLevel.hpp"
97 
100 
101 #ifdef HAVE_MUELU_CUDA
102 #include "cuda_profiler_api.h"
103 #endif
104 
105 
106 namespace MueLu {
107 
108  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
110  Teuchos::ArrayRCP<bool> nonzeros) {
111  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
112  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
113  const magnitudeType eps = 2.0*Teuchos::ScalarTraits<magnitudeType>::eps();
114  for(size_t i=0; i<static_cast<size_t>(vals.size()); i++) {
115  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
116  }
117  }
118 
119 
120  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  void DetectDirichletCols(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
122  const Teuchos::ArrayRCP<bool>& dirichletRows,
123  Teuchos::ArrayRCP<bool> dirichletCols,
124  Teuchos::ArrayRCP<bool> dirichletDomain) {
126  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A.getDomainMap();
129  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getNodeNumElements());
130  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getNodeNumElements());
131  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getNodeNumElements());
132  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > myColsToZero = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap, 1, /*zeroOut=*/true);
133  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
134  for(size_t i=0; i<(size_t) dirichletRows.size(); i++) {
135  if (dirichletRows[i]) {
138  A.getLocalRowView(i,indices,values);
139  for(size_t j=0; j<static_cast<size_t>(indices.size()); j++)
140  myColsToZero->replaceLocalValue(indices[j],0,one);
141  }
142  }
143 
145  RCP<const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer = A.getCrsGraph()->getImporter();
146  if (!importer.is_null()) {
147  globalColsToZero = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(domMap, 1, /*zeroOut=*/true);
148  // export to domain map
149  globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD);
150  // import to column map
151  myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT);
152  }
153  else
154  globalColsToZero = myColsToZero;
155 
156  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->getData(0),dirichletDomain);
157  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->getData(0),dirichletCols);
158  }
159 
160 
161 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
162 
163  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
164  void FindNonZeros(const typename Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev_um vals,
166  using ATS = Kokkos::ArithTraits<Scalar>;
167  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
169  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
170  const typename ATS::magnitudeType eps = 2.0*impl_ATS::eps();
171 
172  Kokkos::parallel_for("MueLu:RefMaxwell::FindNonZeros", range_type(0,vals.extent(0)),
173  KOKKOS_LAMBDA (const size_t i) {
174  nonzeros(i) = (impl_ATS::magnitude(vals(i,0)) > eps);
175  });
176  }
177 
178  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
179  void DetectDirichletCols(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
185  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A.getDomainMap();
186  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > rowMap = A.getRowMap();
187  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > colMap = A.getColMap();
188  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getNodeNumElements());
189  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getNodeNumElements());
190  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getNodeNumElements());
191  RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > myColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap, /*zeroOut=*/true);
192  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
193  auto myColsToZeroView = myColsToZero->template getLocalView<typename Node::device_type>();
194  auto localMatrix = A.getLocalMatrix();
195  Kokkos::parallel_for("MueLu:RefMaxwell::DetectDirichletCols", range_type(0,rowMap->getNodeNumElements()),
196  KOKKOS_LAMBDA(const LocalOrdinal row) {
197  if (dirichletRows(row)) {
198  auto rowView = localMatrix.row(row);
199  auto length = rowView.length;
200 
201  for (decltype(length) colID = 0; colID < length; colID++)
202  myColsToZeroView(rowView.colidx(colID),0) = one;
203  }
204  });
205 
206  RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > globalColsToZero;
207  RCP<const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer = A.getCrsGraph()->getImporter();
208  if (!importer.is_null()) {
209  globalColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(domMap, /*zeroOut=*/true);
210  // export to domain map
211  globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD);
212  // import to column map
213  myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT);
214  }
215  else
216  globalColsToZero = myColsToZero;
217  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->template getLocalView<typename Node::device_type>(),dirichletDomain);
218  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->template getLocalView<typename Node::device_type>(),dirichletCols);
219  }
220 
221 #endif
222 
223  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
225  return SM_Matrix_->getDomainMap();
226  }
227 
228 
229  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
231  return SM_Matrix_->getRangeMap();
232  }
233 
234 
235  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
237 
238  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
239  Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
240  if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
241  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
242  if(list.isSublist("refmaxwell: 22list"))
243  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
244  list = newList;
245  }
246 
247  parameterList_ = list;
248  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
250  std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
251  if (outputFilename != "")
252  VerboseObject::SetMueLuOFileStream(outputFilename);
253  if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
254  VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
255 
256  if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
257  GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
258  disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
259  mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
260  use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
261  dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
262  implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
263  fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
264  syncTimers_ = list.get("sync timers", false);
265  numItersH_ = list.get("refmaxwell: num iters H", 1);
266  numIters22_ = list.get("refmaxwell: num iters 22", 1);
267 
268  if(list.isSublist("refmaxwell: 11list"))
269  precList11_ = list.sublist("refmaxwell: 11list");
270  if(!precList11_.isType<std::string>("smoother: type") && !precList11_.isType<std::string>("smoother: pre type") && !precList11_.isType<std::string>("smoother: post type")) {
271  precList11_.set("smoother: type", "CHEBYSHEV");
272  precList11_.sublist("smoother: params").set("chebyshev: degree",2);
273  precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
274  precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
275  }
276 
277  if(list.isSublist("refmaxwell: 22list"))
278  precList22_ = list.sublist("refmaxwell: 22list");
279  if(!precList22_.isType<std::string>("smoother: type") && !precList22_.isType<std::string>("smoother: pre type") && !precList22_.isType<std::string>("smoother: post type")) {
280  precList22_.set("smoother: type", "CHEBYSHEV");
281  precList22_.sublist("smoother: params").set("chebyshev: degree",2);
282  precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
283  precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
284  }
285 
286  if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
287  list.set("smoother: type", "CHEBYSHEV");
288  list.sublist("smoother: params").set("chebyshev: degree",2);
289  list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
290  list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
291  }
292 
293  if(list.isSublist("smoother: params")) {
294  smootherList_ = list.sublist("smoother: params");
295  }
296 
297 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
298  useKokkos_ = false;
299 #else
300 # ifdef HAVE_MUELU_SERIAL
301  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
302  useKokkos_ = false;
303 # endif
304 # ifdef HAVE_MUELU_OPENMP
305  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
306  useKokkos_ = true;
307 # endif
308 # ifdef HAVE_MUELU_CUDA
309  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
310  useKokkos_ = true;
311 # endif
312  useKokkos_ = list.get("use kokkos refactor",useKokkos_);
313 #endif
314  }
315 
316 
317  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319 
320 #ifdef HAVE_MUELU_CUDA
321  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
322 #endif
323 
324  std::string timerLabel;
325  if (reuse)
326  timerLabel = "MueLu RefMaxwell: compute (reuse)";
327  else
328  timerLabel = "MueLu RefMaxwell: compute";
329  RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
330 
332  // Remove explicit zeros from matrices
333 
334  bool defaultFilter = false;
335 
336  // Remove zero entries from D0 if necessary.
337  // In the construction of the prolongator we use the graph of the
338  // matrix, so zero entries mess it up.
339  if (parameterList_.get<bool>("refmaxwell: filter D0", true) && D0_Matrix_->getNodeMaxNumRowEntries()>2) {
340  Level fineLevel;
341  fineLevel.SetFactoryManager(null);
342  fineLevel.SetLevelID(0);
343  fineLevel.Set("A",D0_Matrix_);
344  fineLevel.setlib(D0_Matrix_->getDomainMap()->lib());
345  // We expect D0 to have entries +-1, so any threshold value will do.
346  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",1.0e-8,/*keepDiagonal=*/false,/*expectedNNZperRow=*/2));
347  fineLevel.Request("A",ThreshFact.get());
348  ThreshFact->Build(fineLevel);
349  D0_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
350 
351  // If D0 has too many zeros, maybe SM and M1 do as well.
352  defaultFilter = true;
353  }
354 
355  if (parameterList_.get<bool>("refmaxwell: filter SM", defaultFilter)) {
356  RCP<Vector> diag = VectorFactory::Build(SM_Matrix_->getRowMap());
357  // find a reasonable absolute value threshold
358  SM_Matrix_->getLocalDiagCopy(*diag);
359  magnitudeType threshold = 1.0e-8 * diag->normInf();
360 
361  Level fineLevel;
362  fineLevel.SetFactoryManager(null);
363  fineLevel.SetLevelID(0);
364  fineLevel.Set("A",SM_Matrix_);
365  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
366  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
367  fineLevel.Request("A",ThreshFact.get());
368  ThreshFact->Build(fineLevel);
369  SM_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
370  }
371 
372  if (parameterList_.get<bool>("refmaxwell: filter M1", defaultFilter)) {
373  RCP<Vector> diag = VectorFactory::Build(M1_Matrix_->getRowMap());
374  // find a reasonable absolute value threshold
375  M1_Matrix_->getLocalDiagCopy(*diag);
376  magnitudeType threshold = 1.0e-8 * diag->normInf();
377 
378  Level fineLevel;
379  fineLevel.SetFactoryManager(null);
380  fineLevel.SetLevelID(0);
381  fineLevel.Set("A",M1_Matrix_);
382  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
383  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
384  fineLevel.Request("A",ThreshFact.get());
385  ThreshFact->Build(fineLevel);
386  M1_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
387  }
388 
389  if (parameterList_.get<bool>("refmaxwell: filter Ms", defaultFilter)) {
390  RCP<Vector> diag = VectorFactory::Build(Ms_Matrix_->getRowMap());
391  // find a reasonable absolute value threshold
392  Ms_Matrix_->getLocalDiagCopy(*diag);
393  magnitudeType threshold = 1.0e-8 * diag->normInf();
394 
395  Level fineLevel;
396  fineLevel.SetFactoryManager(null);
397  fineLevel.SetLevelID(0);
398  fineLevel.Set("A",Ms_Matrix_);
399  fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
400  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
401  fineLevel.Request("A",ThreshFact.get());
402  ThreshFact->Build(fineLevel);
403  Ms_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
404  }
405 
406  if (IsPrint(Statistics2)) {
407  RCP<ParameterList> params = rcp(new ParameterList());;
408  params->set("printLoadBalancingInfo", true);
409  params->set("printCommInfo", true);
410  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
411  }
412 
414  // Detect Dirichlet boundary conditions
415 
416  if (!reuse) {
417  // clean rows associated with boundary conditions
418  // Find rows with only 1 or 2 nonzero entries, record them in BCrows_.
419  // BCrows_[i] is true, iff i is a boundary row
420  // BCcols_[i] is true, iff i is a boundary column
421  int BCedgesLocal = 0;
422  int BCnodesLocal = 0;
423 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
424  if (useKokkos_) {
425  BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
426 
427  double rowsumTol = parameterList_.get("refmaxwell: row sum drop tol",-1.0);
428  if (rowsumTol > 0.) {
429  typedef Teuchos::ScalarTraits<Scalar> STS;
430  RCP<const Map> rowmap = SM_Matrix_->getRowMap();
431  for (LO row = 0; row < Teuchos::as<LO>(SM_Matrix_->getRowMap()->getNodeNumElements()); ++row) {
432  size_t nnz = SM_Matrix_->getNumEntriesInLocalRow(row);
433  ArrayView<const LO> indices;
434  ArrayView<const SC> vals;
435  SM_Matrix_->getLocalRowView(row, indices, vals);
436 
437  SC rowsum = STS::zero();
438  SC diagval = STS::zero();
439  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
440  LO col = indices[colID];
441  if (row == col)
442  diagval = vals[colID];
443  rowsum += vals[colID];
444  }
445  if (STS::real(rowsum) > STS::magnitude(diagval) * rowsumTol)
446  BCrowsKokkos_(row) = true;
447  }
448  }
449 
450  BCcolsKokkos_ = Kokkos::View<bool*,typename Node::device_type>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getColMap()->getNodeNumElements());
451  BCdomainKokkos_ = Kokkos::View<bool*,typename Node::device_type>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getDomainMap()->getNodeNumElements());
452  DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*D0_Matrix_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_);
453 
454  dump(BCrowsKokkos_, "BCrows.m");
455  dump(BCcolsKokkos_, "BCcols.m");
456  dump(BCdomainKokkos_, "BCdomain.m");
457 
458  for (size_t i = 0; i<BCrowsKokkos_.size(); i++)
459  if (BCrowsKokkos_(i))
460  BCedgesLocal += 1;
461  for (size_t i = 0; i<BCdomainKokkos_.size(); i++)
462  if (BCdomainKokkos_(i))
463  BCnodesLocal += 1;
464  } else
465 #endif // HAVE_MUELU_KOKKOS_REFACTOR
466  {
467  BCrows_ = Teuchos::arcp_const_cast<bool>(Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true));
468 
469  double rowsumTol = parameterList_.get("refmaxwell: row sum drop tol",-1.0);
470  if (rowsumTol > 0.) {
471  typedef Teuchos::ScalarTraits<Scalar> STS;
472  RCP<const Map> rowmap = SM_Matrix_->getRowMap();
473  for (LO row = 0; row < Teuchos::as<LO>(SM_Matrix_->getRowMap()->getNodeNumElements()); ++row) {
474  size_t nnz = SM_Matrix_->getNumEntriesInLocalRow(row);
475  ArrayView<const LO> indices;
476  ArrayView<const SC> vals;
477  SM_Matrix_->getLocalRowView(row, indices, vals);
478 
479  SC rowsum = STS::zero();
480  SC diagval = STS::zero();
481  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
482  LO col = indices[colID];
483  if (row == col)
484  diagval = vals[colID];
485  rowsum += vals[colID];
486  }
487  if (STS::real(rowsum) > STS::magnitude(diagval) * rowsumTol)
488  BCrows_[row] = true;
489  }
490  }
491 
492  BCcols_.resize(D0_Matrix_->getColMap()->getNodeNumElements());
493  BCdomain_.resize(D0_Matrix_->getDomainMap()->getNodeNumElements());
494  DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*D0_Matrix_,BCrows_,BCcols_,BCdomain_);
495 
496  dump(BCrows_, "BCrows.m");
497  dump(BCcols_, "BCcols.m");
498  dump(BCdomain_, "BCdomain.m");
499 
500  for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it)
501  if (*it)
502  BCedgesLocal += 1;
503  for (auto it = BCdomain_.begin(); it != BCdomain_.end(); ++it)
504  if (*it)
505  BCnodesLocal += 1;
506  }
507 
508 #ifdef HAVE_MPI
509  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCedgesLocal, BCedges_);
510  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCnodesLocal, BCnodes_);
511 #else
512  BCedges_ = BCedgesLocal;
513  BCnodes_ = BCnodesLocal;
514 #endif
515  if (IsPrint(Statistics2)) {
516  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
517  }
518 
519  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<Xpetra::global_size_t>(BCedges_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError,
520  "All edges are detected as boundary edges!");
521 
522  }
523 
525  // build nullspace if necessary
526 
527  if(Nullspace_ != null) {
528  // no need to do anything - nullspace is built
529  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
530  }
531  else if(Nullspace_ == null && Coords_ != null) {
532  // normalize coordinates
533  Array<coordinateType> norms(Coords_->getNumVectors());
534  Coords_->norm2(norms);
535  for (size_t i=0;i<Coords_->getNumVectors();i++)
536  norms[i] = ((coordinateType)1.0)/norms[i];
537  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
538 
539  // Cast coordinates to Scalar so they can be multiplied against D0
540  Array<Scalar> normsSC(Coords_->getNumVectors());
541  for (size_t i=0;i<Coords_->getNumVectors();i++)
542  normsSC[i] = (SC) norms[i];
543 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
544  RCP<MultiVector> CoordsSC;
545  if (useKokkos_)
546  CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
547  else
548  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
549 #else
551 #endif
552  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
553  if (IsPrint(Statistics2)) {
554  // compute edge lengths
555  ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
556  for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
557  localNullspace[i] = Nullspace_->getData(i);
561  for (size_t j=0; j < Nullspace_->getMap()->getNodeNumElements(); j++) {
563  for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
564  lenSC += localNullspace[i][j]*localNullspace[i][j];
566  localMinLen = std::min(localMinLen, len);
567  localMaxLen = std::max(localMaxLen, len);
568  localMeanLen += len;
569  }
570  coordinateType minLen, maxLen, meanLen;
571 #ifdef HAVE_MPI
572  RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
573  MueLu_minAll(comm, localMinLen, minLen);
574  MueLu_sumAll(comm, localMeanLen, meanLen);
575  MueLu_maxAll(comm, localMaxLen, maxLen);
576 #else
577  minLen = localMinLen;
578  meanLen = localMeanLen;
579  maxLen = localMaxLen;
580 #endif
581  meanLen /= Nullspace_->getMap()->getGlobalNumElements();
582  GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
583  }
584  Nullspace_->scale(normsSC());
585  }
586  else {
587  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
588  }
589 
590  if (!reuse) {
591  // Nuke the BC edges in nullspace
592 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
593  if (useKokkos_)
594  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
595  else
596  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
597 #else
598  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
599 #endif
600  dump(*Nullspace_, "nullspace.m");
601  }
602 
604  // build special prolongator for (1,1)-block
605 
606  if(P11_.is_null()) {
607  // Form A_nodal = D0* Ms D0 (aka TMT_agg)
608  Level fineLevel, coarseLevel;
609  fineLevel.SetFactoryManager(null);
610  coarseLevel.SetFactoryManager(null);
611  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
612  fineLevel.SetLevelID(0);
613  coarseLevel.SetLevelID(1);
614  fineLevel.Set("A",Ms_Matrix_);
615  coarseLevel.Set("P",D0_Matrix_);
616  coarseLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
617  fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
618  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
619  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
620 
621  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
622  ParameterList rapList = *(rapFact->GetValidParameterList());
623  rapList.set("transpose: use implicit", true);
624  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
625  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
626  rapFact->SetParameterList(rapList);
627 
628 
629  coarseLevel.Request("A", rapFact.get());
630 
631  A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
632 
633  // Apply boundary conditions to A_nodal
634 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
635  if (useKokkos_)
636  Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
637  else
638 #endif
639  Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
640  dump(*A_nodal_Matrix_, "A_nodal.m");
641 
642  // build special prolongator
643  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
644  buildProlongator();
645  }
646 
647 #ifdef HAVE_MPI
648  bool doRebalancing = false;
649  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
650  int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
651  int numProcsAH, numProcsA22;
652 #endif
653  {
654  // build coarse grid operator for (1,1)-block
655  formCoarseMatrix();
656 
657 #ifdef HAVE_MPI
658  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
659  if (doRebalancing && numProcs > 1) {
660 
661  {
662  Level level;
663  level.SetFactoryManager(null);
664  level.SetLevelID(0);
665  level.Set("A",AH_);
666 
667  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
668  ParameterList repartheurParams;
669  repartheurParams.set("repartition: start level", 0);
670  // Setting min == target on purpose.
671  int defaultTargetRows = 10000;
672  repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
673  repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
674  repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
675  repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
676  repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
677  repartheurFactory->SetParameterList(repartheurParams);
678 
679  level.Request("number of partitions", repartheurFactory.get());
680  repartheurFactory->Build(level);
681  numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
682  numProcsAH = std::min(numProcsAH,numProcs);
683  }
684 
685  {
686  Level level;
687  level.SetFactoryManager(null);
688  level.SetLevelID(0);
689 
690  level.Set("Map",D0_Matrix_->getDomainMap());
691 
692  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
693  ParameterList repartheurParams;
694  repartheurParams.set("repartition: start level", 0);
695  repartheurParams.set("repartition: use map", true);
696  // Setting min == target on purpose.
697  int defaultTargetRows = 10000;
698  repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
699  repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
700  repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
701  repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
702  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
703  repartheurFactory->SetParameterList(repartheurParams);
704 
705  level.Request("number of partitions", repartheurFactory.get());
706  repartheurFactory->Build(level);
707  numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
708  numProcsA22 = std::min(numProcsA22,numProcs);
709  }
710 
711  if (rebalanceStriding >= 1) {
712  TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
713  TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
714  if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
715  GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
716  << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
717  rebalanceStriding = -1;
718  }
719  }
720 
721  } else
722  doRebalancing = false;
723 
724  if (doRebalancing) { // rebalance AH
725  if (!reuse) {
726  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
727 
728  Level fineLevel, coarseLevel;
729  fineLevel.SetFactoryManager(null);
730  coarseLevel.SetFactoryManager(null);
731  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
732  fineLevel.SetLevelID(0);
733  coarseLevel.SetLevelID(1);
734  coarseLevel.Set("A",AH_);
735  coarseLevel.Set("P",P11_);
736  coarseLevel.Set("Coordinates",CoordsH_);
737  coarseLevel.Set("number of partitions", numProcsAH);
738  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
739 
740  coarseLevel.setlib(AH_->getDomainMap()->lib());
741  fineLevel.setlib(AH_->getDomainMap()->lib());
742  coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
743  fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
744 
745  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
746  RCP<Factory> partitioner;
747  if (partName == "zoltan") {
748 #ifdef HAVE_MUELU_ZOLTAN
749  partitioner = rcp(new ZoltanInterface());
750  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
751  // partitioner->SetFactory("number of partitions", repartheurFactory);
752 #else
753  throw Exceptions::RuntimeError("Zoltan interface is not available");
754 #endif
755  } else if (partName == "zoltan2") {
756 #ifdef HAVE_MUELU_ZOLTAN2
757  partitioner = rcp(new Zoltan2Interface());
758  ParameterList partParams;
759  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
760  partParams.set("ParameterList", partpartParams);
761  partitioner->SetParameterList(partParams);
762  // partitioner->SetFactory("number of partitions", repartheurFactory);
763 #else
764  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
765 #endif
766  }
767 
768  auto repartFactory = rcp(new RepartitionFactory());
769  ParameterList repartParams;
770  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
771  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
772  if (rebalanceStriding >= 1) {
773  bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
774  if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
775  acceptPart = false;
776  repartParams.set("repartition: remap accept partition", acceptPart);
777  }
778  repartFactory->SetParameterList(repartParams);
779  // repartFactory->SetFactory("number of partitions", repartheurFactory);
780  repartFactory->SetFactory("Partition", partitioner);
781 
782  auto newP = rcp(new RebalanceTransferFactory());
783  ParameterList newPparams;
784  newPparams.set("type", "Interpolation");
785  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
786  newPparams.set("repartition: use subcommunicators", true);
787  newPparams.set("repartition: rebalance Nullspace", false);
788  newP->SetFactory("Coordinates", NoFactory::getRCP());
789  newP->SetParameterList(newPparams);
790  newP->SetFactory("Importer", repartFactory);
791 
792  auto newA = rcp(new RebalanceAcFactory());
793  ParameterList rebAcParams;
794  rebAcParams.set("repartition: use subcommunicators", true);
795  newA->SetParameterList(rebAcParams);
796  newA->SetFactory("Importer", repartFactory);
797 
798  coarseLevel.Request("P", newP.get());
799  coarseLevel.Request("Importer", repartFactory.get());
800  coarseLevel.Request("A", newA.get());
801  coarseLevel.Request("Coordinates", newP.get());
802  repartFactory->Build(coarseLevel);
803 
804  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
805  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
806  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
807  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
808  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
809 
810  } else {
811  ParameterList XpetraList;
812  XpetraList.set("Restrict Communicator",true);
813  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceAH");
814  RCP<const Map> targetMap = ImporterH_->getTargetMap();
815  AH_ = MatrixFactory::Build(AH_, *ImporterH_, *ImporterH_, targetMap, targetMap, rcp(&XpetraList,false));
816  }
817  if (!AH_.is_null())
818  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
819  }
820 #endif // HAVE_MPI
821 
822 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
823  // This should be taken out again as soon as
824  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
825  // drop tol != 0.0
826  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
827  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
828  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
829  precList11_.set("aggregation: drop tol", 0.0);
830  }
831 #endif
832  dump(*P11_, "P11.m");
833 
834  if (!implicitTranspose_) {
835 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
836  if (useKokkos_)
837  R11_ = Utilities_kokkos::Transpose(*P11_);
838  else
839  R11_ = Utilities::Transpose(*P11_);
840 #else
841  R11_ = Utilities::Transpose(*P11_);
842 #endif
843  dump(*R11_, "R11.m");
844  }
845 
847  if (!AH_.is_null()) {
848  dump(*AH_, "AH.m");
849  dumpCoords(*CoordsH_, "coordsH.m");
850  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
851  if (IsPrint(Statistics2)) {
852  RCP<ParameterList> params = rcp(new ParameterList());;
853  params->set("printLoadBalancingInfo", true);
854  params->set("printCommInfo", true);
855  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
856  }
857  if (!reuse) {
858  ParameterList& userParamList = precList11_.sublist("user data");
859  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
860  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
861  } else {
862  RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
863  level0->Set("A", AH_);
864  HierarchyH_->SetupRe();
865  }
866  SetProcRankVerbose(oldRank);
867  }
868  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
869 
870  }
871 
872  if(!reuse) {
873  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
874 
875  D0_Matrix_->resumeFill();
876  Scalar replaceWith;
877  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
878  replaceWith= Teuchos::ScalarTraits<SC>::eps();
879  else
880  replaceWith = Teuchos::ScalarTraits<SC>::zero();
881 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
882  if (useKokkos_) {
883  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
884  } else {
885  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
886  }
887 #else
888  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
889 #endif
890  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
891  }
892 
894  // Build A22 = D0* SM D0 and hierarchy for A22
895  {
896  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
897 
898  { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
899  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
900 
901  Level fineLevel, coarseLevel;
902  fineLevel.SetFactoryManager(null);
903  coarseLevel.SetFactoryManager(null);
904  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
905  fineLevel.SetLevelID(0);
906  coarseLevel.SetLevelID(1);
907  fineLevel.Set("A",SM_Matrix_);
908  coarseLevel.Set("P",D0_Matrix_);
909  coarseLevel.Set("Coordinates",Coords_);
910 
911  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
912  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
913  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
914  fineLevel.setObjectLabel("RefMaxwell (2,2)");
915 
916  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
917  ParameterList rapList = *(rapFact->GetValidParameterList());
918  rapList.set("transpose: use implicit", true);
919  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
920  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
921  rapFact->SetParameterList(rapList);
922 
923  if (!A22_AP_reuse_data_.is_null()) {
924  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
925  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
926  }
927  if (!A22_RAP_reuse_data_.is_null()) {
928  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
929  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
930  }
931 
932 #ifdef HAVE_MPI
933  if (doRebalancing) {
934 
935  if (!reuse) {
936 
937  coarseLevel.Set("number of partitions", numProcsA22);
938  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
939 
940  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
941  RCP<Factory> partitioner;
942  if (partName == "zoltan") {
943 #ifdef HAVE_MUELU_ZOLTAN
944  partitioner = rcp(new ZoltanInterface());
945  partitioner->SetFactory("A", rapFact);
946  // partitioner->SetFactory("number of partitions", repartheurFactory);
947  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
948 #else
949  throw Exceptions::RuntimeError("Zoltan interface is not available");
950 #endif
951  } else if (partName == "zoltan2") {
952 #ifdef HAVE_MUELU_ZOLTAN2
953  partitioner = rcp(new Zoltan2Interface());
954  ParameterList partParams;
955  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
956  partParams.set("ParameterList", partpartParams);
957  partitioner->SetParameterList(partParams);
958  partitioner->SetFactory("A", rapFact);
959  // partitioner->SetFactory("number of partitions", repartheurFactory);
960 #else
961  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
962 #endif
963  }
964 
965  auto repartFactory = rcp(new RepartitionFactory());
966  ParameterList repartParams;
967  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
968  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
969  if (rebalanceStriding >= 1) {
970  bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
971  if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
972  acceptPart = false;
973  if (acceptPart)
974  TEUCHOS_ASSERT(AH_.is_null());
975  repartParams.set("repartition: remap accept partition", acceptPart);
976  } else
977  repartParams.set("repartition: remap accept partition", AH_.is_null());
978  repartFactory->SetParameterList(repartParams);
979  repartFactory->SetFactory("A", rapFact);
980  // repartFactory->SetFactory("number of partitions", repartheurFactory);
981  repartFactory->SetFactory("Partition", partitioner);
982 
983  auto newP = rcp(new RebalanceTransferFactory());
984  ParameterList newPparams;
985  newPparams.set("type", "Interpolation");
986  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
987  newPparams.set("repartition: use subcommunicators", true);
988  newPparams.set("repartition: rebalance Nullspace", false);
989  newP->SetFactory("Coordinates", NoFactory::getRCP());
990  newP->SetParameterList(newPparams);
991  newP->SetFactory("Importer", repartFactory);
992 
993  auto newA = rcp(new RebalanceAcFactory());
994  ParameterList rebAcParams;
995  rebAcParams.set("repartition: use subcommunicators", true);
996  newA->SetParameterList(rebAcParams);
997  newA->SetFactory("A", rapFact);
998  newA->SetFactory("Importer", repartFactory);
999 
1000  coarseLevel.Request("P", newP.get());
1001  coarseLevel.Request("Importer", repartFactory.get());
1002  coarseLevel.Request("A", newA.get());
1003  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1004  if (!parameterList_.get<bool>("rap: triple product", false))
1005  coarseLevel.Request("AP reuse data", rapFact.get());
1006  coarseLevel.Request("RAP reuse data", rapFact.get());
1007  }
1008  coarseLevel.Request("Coordinates", newP.get());
1009  rapFact->Build(fineLevel,coarseLevel);
1010  repartFactory->Build(coarseLevel);
1011 
1012  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
1013  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
1014  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
1015  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
1016  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1017  if (!parameterList_.get<bool>("rap: triple product", false))
1018  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1019  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1020  }
1021  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
1022  } else {
1023  coarseLevel.Request("A", rapFact.get());
1024  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1025  coarseLevel.Request("AP reuse data", rapFact.get());
1026  coarseLevel.Request("RAP reuse data", rapFact.get());
1027  }
1028 
1029  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1030  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1031  if (!parameterList_.get<bool>("rap: triple product", false))
1032  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1033  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1034  }
1035 
1036  ParameterList XpetraList;
1037  XpetraList.set("Restrict Communicator",true);
1038  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceA22");
1039  RCP<const Map> targetMap = Importer22_->getTargetMap();
1040  A22_ = MatrixFactory::Build(A22_, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
1041  }
1042  } else
1043 #endif // HAVE_MPI
1044  {
1045  coarseLevel.Request("A", rapFact.get());
1046  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1047  coarseLevel.Request("AP reuse data", rapFact.get());
1048  coarseLevel.Request("RAP reuse data", rapFact.get());
1049  }
1050 
1051  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1052 
1053  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1054  if (!parameterList_.get<bool>("rap: triple product", false))
1055  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1056  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1057  }
1058  }
1059  }
1060 
1061  if (!implicitTranspose_) {
1062 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1063  if (useKokkos_)
1064  D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
1065  else
1066  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
1067 #else
1068  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
1069 #endif
1070  }
1071 
1072  VerbLevel verbosityLevel = VerboseObject::GetDefaultVerbLevel();
1073  if (!A22_.is_null()) {
1074  dump(*A22_, "A22.m");
1075  A22_->setObjectLabel("RefMaxwell (2,2)");
1076  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
1077  if (IsPrint(Statistics2)) {
1078  RCP<ParameterList> params = rcp(new ParameterList());;
1079  params->set("printLoadBalancingInfo", true);
1080  params->set("printCommInfo", true);
1081  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
1082  }
1083  if (!reuse) {
1084  ParameterList& userParamList = precList22_.sublist("user data");
1085  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
1086  // If we detected no boundary conditions, the (2,2) problem is singular.
1087  // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
1088  std::string coarseType = "";
1089  if (precList22_.isParameter("coarse: type")) {
1090  coarseType = precList22_.get<std::string>("coarse: type");
1091  // Transform string to "Abcde" notation
1092  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
1093  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
1094  }
1095  if (BCedges_ == 0 &&
1096  (coarseType == "" ||
1097  coarseType == "Klu" ||
1098  coarseType == "Klu2") &&
1099  (!precList22_.isSublist("coarse: params") ||
1100  !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
1101  precList22_.sublist("coarse: params").set("fix nullspace",true);
1102  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
1103  } else {
1104  RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
1105  level0->Set("A", A22_);
1106  Hierarchy22_->SetupRe();
1107  }
1108  SetProcRankVerbose(oldRank);
1109  }
1110  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
1111 
1112  }
1113 
1114  if(!reuse) {
1115  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
1116 
1117  D0_Matrix_->resumeFill();
1118  Scalar replaceWith;
1119  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
1120  replaceWith= Teuchos::ScalarTraits<SC>::eps();
1121  else
1122  replaceWith = Teuchos::ScalarTraits<SC>::zero();
1123 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1124  if (useKokkos_) {
1125  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
1126  } else {
1127  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
1128  }
1129 #else
1130  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
1131 #endif
1132  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
1133  dump(*D0_Matrix_, "D0_nuked.m");
1134  }
1135 
1136  {
1137  if (parameterList_.isType<std::string>("smoother: type") &&
1138  parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
1139  SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1140  A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1141  D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
1142 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1143  ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
1144 
1145  if (smootherList_.isSublist("smoother: pre params"))
1146  smootherPreList = smootherList_.sublist("smoother: pre params");
1147  else if (smootherList_.isSublist("smoother: params"))
1148  smootherPreList = smootherList_.sublist("smoother: params");
1149  hiptmairPreList.set("hiptmair: smoother type 1",
1150  smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1151  hiptmairPreList.set("hiptmair: smoother type 2",
1152  smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1153  if(smootherPreList.isSublist("hiptmair: smoother list 1"))
1154  hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
1155  if(smootherPreList.isSublist("hiptmair: smoother list 2"))
1156  hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
1157  hiptmairPreList.set("hiptmair: pre or post",
1158  smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
1159  hiptmairPreList.set("hiptmair: zero starting solution",
1160  smootherPreList.get<bool>("hiptmair: zero starting solution", true));
1161 
1162  if (smootherList_.isSublist("smoother: post params"))
1163  smootherPostList = smootherList_.sublist("smoother: post params");
1164  else if (smootherList_.isSublist("smoother: params"))
1165  smootherPostList = smootherList_.sublist("smoother: params");
1166  hiptmairPostList.set("hiptmair: smoother type 1",
1167  smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1168  hiptmairPostList.set("hiptmair: smoother type 2",
1169  smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1170  if(smootherPostList.isSublist("hiptmair: smoother list 1"))
1171  hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
1172  if(smootherPostList.isSublist("hiptmair: smoother list 2"))
1173  hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
1174  hiptmairPostList.set("hiptmair: pre or post",
1175  smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
1176  hiptmairPostList.set("hiptmair: zero starting solution",
1177  smootherPostList.get<bool>("hiptmair: zero starting solution", false));
1178 
1179  typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
1180  RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
1182  RCP<const TROW > PMatrix = Utilities::Op2NonConstTpetraRow(D0_Matrix_);
1183 
1184  hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1185  hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
1186  hiptmairPreSmoother_ -> initialize();
1187  hiptmairPreSmoother_ -> compute();
1188  hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1189  hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
1190  hiptmairPostSmoother_ -> initialize();
1191  hiptmairPostSmoother_ -> compute();
1192  useHiptmairSmoothing_ = true;
1193 #else
1194  throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
1195 #endif // defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1196  } else {
1197  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
1198  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
1199  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
1200 
1201  ParameterList preSmootherList, postSmootherList;
1202  if (parameterList_.isSublist("smoother: pre params"))
1203  preSmootherList = parameterList_.sublist("smoother: pre params");
1204  if (parameterList_.isSublist("smoother: post params"))
1205  postSmootherList = parameterList_.sublist("smoother: post params");
1206 
1207  Level level;
1208  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1209  level.SetFactoryManager(factoryHandler);
1210  level.SetLevelID(0);
1211  level.setObjectLabel("RefMaxwell (1,1)");
1212  level.Set("A",SM_Matrix_);
1213  level.setlib(SM_Matrix_->getDomainMap()->lib());
1214 
1215  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
1216  RCP<SmootherFactory> preSmootherFact = rcp(new SmootherFactory(preSmootherPrototype));
1217 
1218  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
1219  RCP<SmootherFactory> postSmootherFact = rcp(new SmootherFactory(postSmootherPrototype));
1220 
1221  level.Request("PreSmoother",preSmootherFact.get());
1222  preSmootherFact->Build(level);
1223  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",preSmootherFact.get());
1224 
1225  level.Request("PostSmoother",postSmootherFact.get());
1226  postSmootherFact->Build(level);
1227  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",postSmootherFact.get());
1228  } else {
1229  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1230  Level level;
1231  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1232  level.SetFactoryManager(factoryHandler);
1233  level.SetLevelID(0);
1234  level.setObjectLabel("RefMaxwell (1,1)");
1235  level.Set("A",SM_Matrix_);
1236  level.setlib(SM_Matrix_->getDomainMap()->lib());
1237  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1238  RCP<SmootherFactory> SmootherFact = rcp(new SmootherFactory(smootherPrototype));
1239  level.Request("PreSmoother",SmootherFact.get());
1240  SmootherFact->Build(level);
1241  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",SmootherFact.get());
1242  PostSmoother_ = PreSmoother_;
1243  }
1244  useHiptmairSmoothing_ = false;
1245  }
1246  }
1247 
1248  if (!ImporterH_.is_null()) {
1249  RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
1250  rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
1251  }
1252 
1253  if (!Importer22_.is_null()) {
1254  RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
1255  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
1256  }
1257 
1258  if ((!D0_T_Matrix_.is_null()) &&
1259  (!R11_.is_null()) &&
1260  (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
1261  (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
1262  (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
1263  (R11_->getColMap()->lib() == Xpetra::UseTpetra))
1264  D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
1265  else
1266  D0_T_R11_colMapsMatch_ = false;
1267  if (D0_T_R11_colMapsMatch_)
1268  GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
1269 
1270  // Allocate temporary MultiVectors for solve
1271  allocateMemory(1);
1272 
1273  if (parameterList_.isSublist("matvec params"))
1274  {
1275  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
1276  setMatvecParams(*SM_Matrix_, matvecParams);
1277  setMatvecParams(*D0_Matrix_, matvecParams);
1278  setMatvecParams(*P11_, matvecParams);
1279  if (!D0_T_Matrix_.is_null()) setMatvecParams(*D0_T_Matrix_, matvecParams);
1280  if (!R11_.is_null()) setMatvecParams(*R11_, matvecParams);
1281  if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
1282  if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
1283  }
1284  if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
1285  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
1286  ImporterH_->setDistributorParameters(importerParams);
1287  }
1288  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
1289  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
1290  Importer22_->setDistributorParameters(importerParams);
1291  }
1292 
1293  describe(GetOStream(Runtime0));
1294 
1295 #ifdef HAVE_MUELU_CUDA
1296  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
1297 #endif
1298  }
1299 
1300 
1301  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1303  if (!R11_.is_null())
1304  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1305  else
1306  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1307  if (D0_T_R11_colMapsMatch_)
1308  D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1309  if (!ImporterH_.is_null()) {
1310  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1311  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), numVectors);
1312  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1313  } else
1314  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1315  if (!D0_T_Matrix_.is_null())
1316  D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1317  else
1318  D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1319  if (!Importer22_.is_null()) {
1320  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1321  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), numVectors);
1322  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1323  } else
1324  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1325  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1326  }
1327 
1328 
1329  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1330  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1331  if (dump_matrices_) {
1332  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1333  Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1334  }
1335  }
1336 
1337 
1338  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1339  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1340  if (dump_matrices_) {
1341  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1342  Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1343  }
1344  }
1345 
1346 
1347  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1349  if (dump_matrices_) {
1350  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1351  Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1352  }
1353  }
1354 
1355 
1356  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1358  if (dump_matrices_) {
1359  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1360  std::ofstream out(name);
1361  for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1362  out << v[i] << "\n";
1363  }
1364  }
1365 
1366 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1367  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1369  if (dump_matrices_) {
1370  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1371  std::ofstream out(name);
1372  auto vH = Kokkos::create_mirror_view (v);
1373  Kokkos::deep_copy(vH , v);
1374  for (size_t i = 0; i < vH.size(); i++)
1375  out << vH[i] << "\n";
1376  }
1377  }
1378 #endif
1379 
1380  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1382  if (IsPrint(Timings)) {
1383  if (!syncTimers_)
1385  else {
1386  if (comm.is_null())
1387  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1388  else
1390  }
1391  } else
1392  return Teuchos::null;
1393  }
1394 
1395 
1396  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1398  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1399  //
1400  // The old implementation used
1401  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1402  // yet the paper gives
1403  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1404  // where phi_k is the k-th nullspace vector.
1405  //
1406  // The graph of D0 contains the incidence from nodes to edges.
1407  // The nodal prolongator P maps aggregates to nodes.
1408 
1409  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1410  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1411  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1412  size_t dim = Nullspace_->getNumVectors();
1413  size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1414 
1415  // build prolongator: algorithm 1 in the reference paper
1416  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1417  RCP<Matrix> P_nodal;
1418  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1419  if (read_P_from_file) {
1420  // This permits to read in an ML prolongator, so that we get the same hierarchy.
1421  // (ML and MueLu typically produce different aggregates.)
1422  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1423  std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1424  std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1425  std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1426  RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1427  RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1428  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1429  CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1430  } else {
1431  Level fineLevel, coarseLevel;
1432  fineLevel.SetFactoryManager(null);
1433  coarseLevel.SetFactoryManager(null);
1434  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1435  fineLevel.SetLevelID(0);
1436  coarseLevel.SetLevelID(1);
1437  fineLevel.Set("A",A_nodal_Matrix_);
1438  fineLevel.Set("Coordinates",Coords_);
1439  fineLevel.Set("DofsPerNode",1);
1440  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1441  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1442  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1443  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1444 
1445  LocalOrdinal NSdim = 1;
1446  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1447  nullSpace->putScalar(SC_ONE);
1448  fineLevel.Set("Nullspace",nullSpace);
1449 
1450  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1451 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1452  if (useKokkos_) {
1453  amalgFact = rcp(new AmalgamationFactory_kokkos());
1454  dropFact = rcp(new CoalesceDropFactory_kokkos());
1455  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1456  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1457  TentativePFact = rcp(new TentativePFactory_kokkos());
1458  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1459  SaPFact = rcp(new SaPFactory_kokkos());
1460  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1461  } else
1462 #endif
1463  {
1464  amalgFact = rcp(new AmalgamationFactory());
1465  dropFact = rcp(new CoalesceDropFactory());
1466  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1467  coarseMapFact = rcp(new CoarseMapFactory());
1468  TentativePFact = rcp(new TentativePFactory());
1469  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1470  SaPFact = rcp(new SaPFactory());
1471  Tfact = rcp(new CoordinatesTransferFactory());
1472  }
1473  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1474  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1475  std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1476  std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1477  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1478  dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1479  if (!useKokkos_)
1480  dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1481 
1482  UncoupledAggFact->SetFactory("Graph", dropFact);
1483  int minAggSize = parameterList_.get("aggregation: min agg size",2);
1484  UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1485  int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1486  UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1487 
1488  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1489 
1490  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1491  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1492  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1493 
1494  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1495  Tfact->SetFactory("CoarseMap", coarseMapFact);
1496 
1497  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1498  SaPFact->SetFactory("P", TentativePFact);
1499  coarseLevel.Request("P", SaPFact.get());
1500  } else
1501  coarseLevel.Request("P",TentativePFact.get());
1502  coarseLevel.Request("Coordinates",Tfact.get());
1503 
1505  if (parameterList_.get("aggregation: export visualization data",false)) {
1506  aggExport = rcp(new AggregationExportFactory());
1507  ParameterList aggExportParams;
1508  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1509  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1510  aggExport->SetParameterList(aggExportParams);
1511 
1512  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1513  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1514  fineLevel.Request("Aggregates",UncoupledAggFact.get());
1515  fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1516  }
1517 
1518  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1519  coarseLevel.Get("P",P_nodal,SaPFact.get());
1520  else
1521  coarseLevel.Get("P",P_nodal,TentativePFact.get());
1522  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1523 
1524  if (parameterList_.get("aggregation: export visualization data",false))
1525  aggExport->Build(fineLevel, coarseLevel);
1526  }
1527  dump(*P_nodal, "P_nodal.m");
1528 
1529  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1530 
1531  // Import off-rank rows of P_nodal into P_nodal_imported
1532  RCP<CrsMatrix> P_nodal_imported;
1533  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1534  if (numProcs > 1) {
1535  RCP<CrsMatrixWrap> P_nodal_temp;
1536  RCP<const Map> targetMap = D0Crs->getColMap();
1537  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1538  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1539  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1540  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1541  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1542  P_nodal_imported = P_nodal_temp->getCrsMatrix();
1543  dump(*P_nodal_temp, "P_nodal_imported.m");
1544  } else
1545  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1546 
1547 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1548  if (useKokkos_) {
1549 
1550  using ATS = Kokkos::ArithTraits<SC>;
1551  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1553 
1554  typedef typename Matrix::local_matrix_type KCRS;
1555  typedef typename KCRS::device_type device_t;
1556  typedef typename KCRS::StaticCrsGraphType graph_t;
1557  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1558  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1559  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1560 
1561  // Get data out of P_nodal_imported and D0.
1562  auto localP = P_nodal_imported->getLocalMatrix();
1563  auto localD0 = D0_Matrix_->getLocalMatrix();
1564 
1565  // Which algorithm should we use for the construction of the special prolongator?
1566  // Option "mat-mat":
1567  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1568  std::string defaultAlgo = "mat-mat";
1569  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1570 
1571  if (algo == "mat-mat") {
1572  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1573  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1574 
1575 #ifdef HAVE_MUELU_DEBUG
1576  TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1577 #endif
1578 
1579  // Get data out of D0*P.
1580  auto localD0P = D0_P_nodal->getLocalMatrix();
1581 
1582  // Create the matrix object
1583  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1584  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1585 
1586  size_t nnzEstimate = dim*localD0P.graph.entries.size();
1587  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1588  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1589  scalar_view_t P11vals("P11_vals",nnzEstimate);
1590 
1591  // adjust rowpointer
1592  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1593  KOKKOS_LAMBDA(const size_t i) {
1594  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1595  });
1596 
1597  // adjust column indices
1598  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1599  KOKKOS_LAMBDA(const size_t jj) {
1600  for (size_t k = 0; k < dim; k++) {
1601  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1602  P11vals(dim*jj+k) = SC_ZERO;
1603  }
1604  });
1605 
1606  auto localNullspace = Nullspace_->template getLocalView<device_t>();
1607 
1608  // enter values
1609  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1610  // The matrix D0 has too many entries per row.
1611  // Therefore we need to check whether its entries are actually non-zero.
1612  // This is the case for the matrices built by MiniEM.
1613  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1614 
1616 
1617  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1618  KOKKOS_LAMBDA(const size_t i) {
1619  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1620  LO l = localD0.graph.entries(ll);
1621  SC p = localD0.values(ll);
1622  if (impl_ATS::magnitude(p) < tol)
1623  continue;
1624  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1625  LO j = localP.graph.entries(jj);
1626  SC v = localP.values(jj);
1627  for (size_t k = 0; k < dim; k++) {
1628  LO jNew = dim*j+k;
1629  SC n = localNullspace(i,k);
1630  size_t m;
1631  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1632  if (P11colind(m) == jNew)
1633  break;
1634 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1635  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1636 #endif
1637  P11vals(m) += half * v * n;
1638  }
1639  }
1640  }
1641  });
1642 
1643  } else {
1644  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1645  KOKKOS_LAMBDA(const size_t i) {
1646  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1647  LO l = localD0.graph.entries(ll);
1648  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1649  LO j = localP.graph.entries(jj);
1650  SC v = localP.values(jj);
1651  for (size_t k = 0; k < dim; k++) {
1652  LO jNew = dim*j+k;
1653  SC n = localNullspace(i,k);
1654  size_t m;
1655  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1656  if (P11colind(m) == jNew)
1657  break;
1658 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1659  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1660 #endif
1661  P11vals(m) += half * v * n;
1662  }
1663  }
1664  }
1665  });
1666  }
1667 
1668  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1669  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1670  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1671  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1672 
1673  } else
1674  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1675  } else {
1676 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1677 
1678  // get nullspace vectors
1679  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1680  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1681  for(size_t i=0; i<dim; i++) {
1682  nullspaceRCP[i] = Nullspace_->getData(i);
1683  nullspace[i] = nullspaceRCP[i]();
1684  }
1685 
1686  // Get data out of P_nodal_imported and D0.
1687  ArrayRCP<const size_t> Prowptr_RCP, D0rowptr_RCP;
1688  ArrayRCP<const LO> Pcolind_RCP, D0colind_RCP;
1689  ArrayRCP<const SC> Pvals_RCP, D0vals_RCP;
1690  ArrayRCP<size_t> P11rowptr_RCP;
1691  ArrayRCP<LO> P11colind_RCP;
1692  ArrayRCP<SC> P11vals_RCP;
1693 
1694  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1695  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1696 
1697  // For efficiency
1698  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1699  // slower than Teuchos::ArrayView::operator[].
1700  ArrayView<const size_t> Prowptr, D0rowptr;
1701  ArrayView<const LO> Pcolind, D0colind;
1702  ArrayView<const SC> Pvals, D0vals;
1703  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1704  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1705 
1706  // Which algorithm should we use for the construction of the special prolongator?
1707  // Option "mat-mat":
1708  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1709  // Option "gustavson":
1710  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1711  // More efficient, but only available for serial node.
1712  std::string defaultAlgo = "mat-mat";
1713  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1714 
1715  if (algo == "mat-mat") {
1716  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1717  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1718 
1719  // Get data out of D0*P.
1720  ArrayRCP<const size_t> D0Prowptr_RCP;
1721  ArrayRCP<const LO> D0Pcolind_RCP;
1722  ArrayRCP<const SC> D0Pvals_RCP;
1723  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1724 
1725  // For efficiency
1726  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1727  // slower than Teuchos::ArrayView::operator[].
1728  ArrayView<const size_t> D0Prowptr;
1729  ArrayView<const LO> D0Pcolind;
1730  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1731 
1732  // Create the matrix object
1733  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1734  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1735  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1736  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1737  size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1738  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1739 
1740  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1741  ArrayView<LO> P11colind = P11colind_RCP();
1742  ArrayView<SC> P11vals = P11vals_RCP();
1743 
1744  // adjust rowpointer
1745  for (size_t i = 0; i < numLocalRows+1; i++) {
1746  P11rowptr[i] = dim*D0Prowptr[i];
1747  }
1748 
1749  // adjust column indices
1750  for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1751  for (size_t k = 0; k < dim; k++) {
1752  P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1753  P11vals[dim*jj+k] = SC_ZERO;
1754  }
1755 
1756  RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1757  RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1758  // enter values
1759  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1760  // The matrix D0 has too many entries per row.
1761  // Therefore we need to check whether its entries are actually non-zero.
1762  // This is the case for the matrices built by MiniEM.
1763  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1764 
1766  for (size_t i = 0; i < numLocalRows; i++) {
1767  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1768  LO l = D0colind[ll];
1769  SC p = D0vals[ll];
1771  continue;
1772  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1773  LO j = Pcolind[jj];
1774  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1775  SC v = Pvals[jj];
1776  for (size_t k = 0; k < dim; k++) {
1777  LO jNew = dim*j+k;
1778  SC n = nullspace[k][i];
1779  size_t m;
1780  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1781  if (P11colind[m] == jNew)
1782  break;
1783 #ifdef HAVE_MUELU_DEBUG
1784  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1785 #endif
1786  P11vals[m] += half * v * n;
1787  }
1788  }
1789  }
1790  }
1791  } else {
1792  // enter values
1793  for (size_t i = 0; i < numLocalRows; i++) {
1794  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1795  LO l = D0colind[ll];
1796  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1797  LO j = Pcolind[jj];
1798  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1799  SC v = Pvals[jj];
1800  for (size_t k = 0; k < dim; k++) {
1801  LO jNew = dim*j+k;
1802  SC n = nullspace[k][i];
1803  size_t m;
1804  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1805  if (P11colind[m] == jNew)
1806  break;
1807 #ifdef HAVE_MUELU_DEBUG
1808  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1809 #endif
1810  P11vals[m] += half * v * n;
1811  }
1812  }
1813  }
1814  }
1815  }
1816 
1817  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1818  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1819 
1820  } else if (algo == "gustavson") {
1821 
1822  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1823  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1824  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1825  // This is ad-hoc and should maybe be replaced with some better heuristics.
1826  size_t nnz_alloc = dim*D0vals_RCP.size();
1827 
1828  // Create the matrix object
1829  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1830  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1831  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1832  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1833  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1834 
1835  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1836  ArrayView<LO> P11colind = P11colind_RCP();
1837  ArrayView<SC> P11vals = P11vals_RCP();
1838 
1839  size_t nnz;
1840  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1841  // The matrix D0 has too many entries per row.
1842  // Therefore we need to check whether its entries are actually non-zero.
1843  // This is the case for the matrices built by MiniEM.
1844  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1845 
1847  nnz = 0;
1848  size_t nnz_old = 0;
1849  for (size_t i = 0; i < numLocalRows; i++) {
1850  P11rowptr[i] = nnz;
1851  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1852  LO l = D0colind[ll];
1853  SC p = D0vals[ll];
1855  continue;
1856  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1857  LO j = Pcolind[jj];
1858  SC v = Pvals[jj];
1859  for (size_t k = 0; k < dim; k++) {
1860  LO jNew = dim*j+k;
1861  SC n = nullspace[k][i];
1862  // do we already have an entry for (i, jNew)?
1863  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1864  P11_status[jNew] = nnz;
1865  P11colind[nnz] = jNew;
1866  P11vals[nnz] = half * v * n;
1867  // or should it be
1868  // P11vals[nnz] = half * n;
1869  nnz++;
1870  } else {
1871  P11vals[P11_status[jNew]] += half * v * n;
1872  // or should it be
1873  // P11vals[P11_status[jNew]] += half * n;
1874  }
1875  }
1876  }
1877  }
1878  nnz_old = nnz;
1879  }
1880  P11rowptr[numLocalRows] = nnz;
1881  } else {
1882  nnz = 0;
1883  size_t nnz_old = 0;
1884  for (size_t i = 0; i < numLocalRows; i++) {
1885  P11rowptr[i] = nnz;
1886  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1887  LO l = D0colind[ll];
1888  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1889  LO j = Pcolind[jj];
1890  SC v = Pvals[jj];
1891  for (size_t k = 0; k < dim; k++) {
1892  LO jNew = dim*j+k;
1893  SC n = nullspace[k][i];
1894  // do we already have an entry for (i, jNew)?
1895  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1896  P11_status[jNew] = nnz;
1897  P11colind[nnz] = jNew;
1898  P11vals[nnz] = half * v * n;
1899  // or should it be
1900  // P11vals[nnz] = half * n;
1901  nnz++;
1902  } else {
1903  P11vals[P11_status[jNew]] += half * v * n;
1904  // or should it be
1905  // P11vals[P11_status[jNew]] += half * n;
1906  }
1907  }
1908  }
1909  }
1910  nnz_old = nnz;
1911  }
1912  P11rowptr[numLocalRows] = nnz;
1913  }
1914 
1915  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1916  // Downward resize
1917  // - Cannot resize for Epetra, as it checks for same pointers
1918  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1919  P11vals_RCP.resize(nnz);
1920  P11colind_RCP.resize(nnz);
1921  }
1922 
1923  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1924  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1925  } else
1926  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1927 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1928  }
1929 #endif
1930  }
1931 
1932  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1934  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
1935 
1937 
1938  // coarse matrix for P11* (M1 + D1* M2 D1) P11
1939  RCP<Matrix> Matrix1;
1940  {
1941  Level fineLevel, coarseLevel;
1942  fineLevel.SetFactoryManager(null);
1943  coarseLevel.SetFactoryManager(null);
1944  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1945  fineLevel.SetLevelID(0);
1946  coarseLevel.SetLevelID(1);
1947  fineLevel.Set("A",SM_Matrix_);
1948  coarseLevel.Set("P",P11_);
1949 
1950  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1951  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1952  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
1953  fineLevel.setObjectLabel("RefMaxwell (1,1)");
1954 
1955  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
1956  ParameterList rapList = *(rapFact->GetValidParameterList());
1957  rapList.set("transpose: use implicit", true);
1958  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
1959  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
1960  rapFact->SetParameterList(rapList);
1961 
1962  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1963  if (!parameterList_.get<bool>("rap: triple product", false))
1964  coarseLevel.Request("AP reuse data", rapFact.get());
1965  coarseLevel.Request("RAP reuse data", rapFact.get());
1966  }
1967 
1968  if (!AH_AP_reuse_data_.is_null()) {
1969  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
1970  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", AH_AP_reuse_data_, rapFact.get());
1971  }
1972  if (!AH_RAP_reuse_data_.is_null()) {
1973  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
1974  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", AH_RAP_reuse_data_, rapFact.get());
1975  }
1976 
1977  coarseLevel.Request("A", rapFact.get());
1978 
1979  Matrix1 = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1980  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1981  if (!parameterList_.get<bool>("rap: triple product", false))
1982  AH_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1983  AH_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1984  }
1985  }
1986 
1987  if (!AH_.is_null())
1988  AH_ = Teuchos::null;
1989 
1990  if(disable_addon_==true) {
1991  // if add-on is not chosen
1992  AH_=Matrix1;
1993  }
1994  else {
1995  if (Addon_Matrix_.is_null()) {
1996  RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
1997  // catch a failure
1998  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1999  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
2000  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
2001 
2002  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
2003  RCP<Matrix> Zaux = MatrixFactory::Build(M1_Matrix_->getRowMap());
2004  RCP<Matrix> Z = MatrixFactory::Build(D0_Matrix_->getDomainMap());
2005 
2006  // construct Zaux = M1 P11
2007  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,*Zaux,true,true);
2008  // construct Z = D0* M1 P11 = D0* Zaux
2009  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,*Z,true,true);
2010 
2011  // construct Z* M0inv Z
2012  RCP<Matrix> Matrix2 = MatrixFactory::Build(Z->getDomainMap());
2013  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2014  // We assume that if M0inv has at most one entry per row then
2015  // these are all diagonal entries.
2016  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2017  M0inv_Matrix_->getLocalDiagCopy(*diag);
2018  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2019  for (size_t j=0; j < diag->getMap()->getNodeNumElements(); j++) {
2020  diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2021  }
2022  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2023  Z->leftScale(*diag);
2024  else {
2025  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2026  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2027  diag2->doImport(*diag,*importer,Xpetra::INSERT);
2028  Z->leftScale(*diag2);
2029  }
2030  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,*Matrix2,true,true);
2031  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2032  RCP<Matrix> C2 = MatrixFactory::Build(M0inv_Matrix_->getRowMap());
2033  // construct C2 = M0inv Z
2034  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,*C2,true,true);
2035  // construct Matrix2 = Z* M0inv Z = Z* C2
2036  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,*Matrix2,true,true);
2037  } else {
2038  // construct Matrix2 = Z* M0inv Z
2039  Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2040  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *Matrix2, true, true);
2041  }
2042  // Should we keep the addon for next setup?
2043  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none")
2044  Addon_Matrix_ = Matrix2;
2045 
2046  // add matrices together
2047  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,one,*Matrix2,false,one,AH_,GetOStream(Runtime0));
2048  AH_->fillComplete();
2049  } else {
2050  // add matrices together
2051  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,one,*Addon_Matrix_,false,one,AH_,GetOStream(Runtime0));
2052  AH_->fillComplete();
2053  }
2054  }
2055 
2056  // set fixed block size for vector nodal matrix
2057  size_t dim = Nullspace_->getNumVectors();
2058  AH_->SetFixedBlockSize(dim);
2059  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
2060 
2061  }
2062 
2063 
2064  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2066  bool reuse = !SM_Matrix_.is_null();
2067  SM_Matrix_ = SM_Matrix_new;
2068  dump(*SM_Matrix_, "SM.m");
2069  if (ComputePrec)
2070  compute(reuse);
2071  }
2072 
2073 
2074  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2075  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2076 
2078 
2079  { // compute residual
2080 
2081  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2082  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2083  }
2084 
2085  { // restrict residual to sub-hierarchies
2086 
2087  if (implicitTranspose_) {
2088  {
2089  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2090  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2091  }
2092  {
2093  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2094  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2095  }
2096  } else {
2097  if (D0_T_R11_colMapsMatch_) {
2098  // Column maps of D0_T and R11 match, and we're running Tpetra
2099  {
2100  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2101  D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2102  }
2103  {
2104  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2105  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2106  }
2107  {
2108  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2109  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2110  }
2111  } else {
2112  {
2113  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2114  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2115  }
2116  {
2117  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2118  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2119  }
2120  }
2121  }
2122  }
2123 
2124  {
2125  RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2126 
2127  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2128 
2129  if (!ImporterH_.is_null() && !implicitTranspose_) {
2130  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2131  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2132  P11res_.swap(P11resTmp_);
2133  }
2134  if (!Importer22_.is_null() && !implicitTranspose_) {
2135  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2136  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2137  D0res_.swap(D0resTmp_);
2138  }
2139 
2140  // iterate on coarse (1, 1) block
2141  if (!AH_.is_null()) {
2142  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2143 
2144  RCP<const Map> origXMap = P11x_->getMap();
2145  RCP<const Map> origRhsMap = P11res_->getMap();
2146 
2147  // Replace maps with maps with a subcommunicator
2148  P11res_->replaceMap(AH_->getRangeMap());
2149  P11x_ ->replaceMap(AH_->getDomainMap());
2150  HierarchyH_->Iterate(*P11res_, *P11x_, numItersH_, true);
2151  P11x_ ->replaceMap(origXMap);
2152  P11res_->replaceMap(origRhsMap);
2153  }
2154 
2155  // iterate on (2, 2) block
2156  if (!A22_.is_null()) {
2157  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2158 
2159  RCP<const Map> origXMap = D0x_->getMap();
2160  RCP<const Map> origRhsMap = D0res_->getMap();
2161 
2162  // Replace maps with maps with a subcommunicator
2163  D0res_->replaceMap(A22_->getRangeMap());
2164  D0x_ ->replaceMap(A22_->getDomainMap());
2165  Hierarchy22_->Iterate(*D0res_, *D0x_, numIters22_, true);
2166  D0x_ ->replaceMap(origXMap);
2167  D0res_->replaceMap(origRhsMap);
2168  }
2169 
2170  }
2171 
2172  if (fuseProlongationAndUpdate_) {
2173  { // prolongate (1,1) block
2174  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2175  P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2176  }
2177 
2178  { // prolongate (2,2) block
2179  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2180  D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2181  }
2182  } else {
2183  { // prolongate (1,1) block
2184  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2185  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2186  }
2187 
2188  { // prolongate (2,2) block
2189  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2190  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2191  }
2192 
2193  { // update current solution
2194  RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2195  X.update(one, *residual_, one);
2196  }
2197  }
2198 
2199  if (!ImporterH_.is_null()) {
2200  P11res_.swap(P11resTmp_);
2201  }
2202  if (!Importer22_.is_null()) {
2203  D0res_.swap(D0resTmp_);
2204  }
2205 
2206  }
2207 
2208 
2209  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2210  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse121(const MultiVector& RHS, MultiVector& X) const {
2211 
2212  // precondition (1,1)-block
2213  solveH(RHS,X);
2214  // precondition (2,2)-block
2215  solve22(RHS,X);
2216  // precondition (1,1)-block
2217  solveH(RHS,X);
2218 
2219  }
2220 
2221 
2222  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2223  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse212(const MultiVector& RHS, MultiVector& X) const {
2224 
2225  // precondition (2,2)-block
2226  solve22(RHS,X);
2227  // precondition (1,1)-block
2228  solveH(RHS,X);
2229  // precondition (2,2)-block
2230  solve22(RHS,X);
2231 
2232  }
2233 
2234  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2235  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2236 
2238 
2239  { // compute residual
2240  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2241  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2242  if (implicitTranspose_)
2243  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2244  else
2245  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2246  }
2247 
2248  { // solve coarse (1,1) block
2249  if (!ImporterH_.is_null() && !implicitTranspose_) {
2250  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2251  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2252  P11res_.swap(P11resTmp_);
2253  }
2254  if (!AH_.is_null()) {
2255  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2256 
2257  RCP<const Map> origXMap = P11x_->getMap();
2258  RCP<const Map> origRhsMap = P11res_->getMap();
2259 
2260  // Replace maps with maps with a subcommunicator
2261  P11res_->replaceMap(AH_->getRangeMap());
2262  P11x_ ->replaceMap(AH_->getDomainMap());
2263  HierarchyH_->Iterate(*P11res_, *P11x_, numItersH_, true);
2264  P11x_ ->replaceMap(origXMap);
2265  P11res_->replaceMap(origRhsMap);
2266  }
2267  }
2268 
2269  { // update current solution
2270  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2271  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2272  X.update(one, *residual_, one);
2273  }
2274  if (!ImporterH_.is_null()) {
2275  P11res_.swap(P11resTmp_);
2276  }
2277 
2278  }
2279 
2280 
2281  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2282  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2283 
2285 
2286  { // compute residual
2287  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2288  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2289  if (implicitTranspose_)
2290  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2291  else
2292  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2293  }
2294 
2295  { // solve (2,2) block
2296  if (!Importer22_.is_null() && !implicitTranspose_) {
2297  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2298  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2299  D0res_.swap(D0resTmp_);
2300  }
2301  if (!A22_.is_null()) {
2302  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2303 
2304  RCP<const Map> origXMap = D0x_->getMap();
2305  RCP<const Map> origRhsMap = D0res_->getMap();
2306 
2307  // Replace maps with maps with a subcommunicator
2308  D0res_->replaceMap(A22_->getRangeMap());
2309  D0x_ ->replaceMap(A22_->getDomainMap());
2310  Hierarchy22_->Iterate(*D0res_, *D0x_, numIters22_, true);
2311  D0x_ ->replaceMap(origXMap);
2312  D0res_->replaceMap(origRhsMap);
2313  }
2314  }
2315 
2316  { //update current solution
2317  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2318  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2319  X.update(one, *residual_, one);
2320  }
2321  if (!Importer22_.is_null()) {
2322  D0res_.swap(D0resTmp_);
2323  }
2324 
2325  }
2326 
2327 
2328  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2329  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2330  Teuchos::ETransp /* mode */,
2331  Scalar /* alpha */,
2332  Scalar /* beta */) const {
2333 
2334  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2335 
2336  // make sure that we have enough temporary memory
2337  if (X.getNumVectors() != P11res_->getNumVectors())
2338  allocateMemory(X.getNumVectors());
2339 
2340  { // apply pre-smoothing
2341 
2342  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2343 
2344 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2345  if (useHiptmairSmoothing_) {
2346  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2347  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2348  hiptmairPreSmoother_->apply(tRHS, tX);
2349  }
2350  else
2351 #endif
2352  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2353  }
2354 
2355  // do solve for the 2x2 block system
2356  if(mode_=="additive")
2357  applyInverseAdditive(RHS,X);
2358  else if(mode_=="121")
2359  applyInverse121(RHS,X);
2360  else if(mode_=="212")
2361  applyInverse212(RHS,X);
2362  else if(mode_=="1")
2363  solveH(RHS,X);
2364  else if(mode_=="2")
2365  solve22(RHS,X);
2366  else if(mode_=="none") {
2367  // do nothing
2368  }
2369  else
2370  applyInverseAdditive(RHS,X);
2371 
2372  { // apply post-smoothing
2373 
2374  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2375 
2376 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2377  if (useHiptmairSmoothing_)
2378  {
2379  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2380  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2381  hiptmairPostSmoother_->apply(tRHS, tX);
2382  }
2383  else
2384 #endif
2385  PostSmoother_->Apply(X, RHS, false);
2386  }
2387 
2388  }
2389 
2390 
2391  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2393  return false;
2394  }
2395 
2396 
2397  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2400  const Teuchos::RCP<Matrix> & Ms_Matrix,
2401  const Teuchos::RCP<Matrix> & M0inv_Matrix,
2402  const Teuchos::RCP<Matrix> & M1_Matrix,
2403  const Teuchos::RCP<MultiVector> & Nullspace,
2404  const Teuchos::RCP<RealValuedMultiVector> & Coords,
2405  Teuchos::ParameterList& List)
2406  {
2407  // some pre-conditions
2408  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2409  TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2410  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2411 
2412 #ifdef HAVE_MUELU_DEBUG
2413  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2414  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2415  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2416 
2417  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2418  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2419  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2420 
2421  TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2422 
2423  if (!M0inv_Matrix) {
2424  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2425  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2426  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2427  }
2428 #endif
2429 
2430  HierarchyH_ = Teuchos::null;
2431  Hierarchy22_ = Teuchos::null;
2432  PreSmoother_ = Teuchos::null;
2433  PostSmoother_ = Teuchos::null;
2434  disable_addon_ = false;
2435  mode_ = "additive";
2436 
2437  // set parameters
2438  setParameters(List);
2439 
2440  if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2441  // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2442  // Fortunately, D0 is quite sparse.
2443  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2444 
2445  RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2446  RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2447  ArrayRCP<const size_t> D0rowptr_RCP;
2448  ArrayRCP<const LO> D0colind_RCP;
2449  ArrayRCP<const SC> D0vals_RCP;
2450  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2451  D0colind_RCP,
2452  D0vals_RCP);
2453 
2454  ArrayRCP<size_t> D0copyrowptr_RCP;
2455  ArrayRCP<LO> D0copycolind_RCP;
2456  ArrayRCP<SC> D0copyvals_RCP;
2457  D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2458  D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2459  D0copycolind_RCP.deepCopy(D0colind_RCP());
2460  D0copyvals_RCP.deepCopy(D0vals_RCP());
2461  D0copyCrs->setAllValues(D0copyrowptr_RCP,
2462  D0copycolind_RCP,
2463  D0copyvals_RCP);
2464  D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2465  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2466  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2467  D0_Matrix_ = D0copy;
2468  } else
2469  D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2470 
2471 
2472  M0inv_Matrix_ = M0inv_Matrix;
2473  Ms_Matrix_ = Ms_Matrix;
2474  M1_Matrix_ = M1_Matrix;
2475  Coords_ = Coords;
2476  Nullspace_ = Nullspace;
2477 
2478  dump(*D0_Matrix_, "D0_clean.m");
2479  dump(*Ms_Matrix_, "Ms.m");
2480  dump(*M1_Matrix_, "M1.m");
2481  if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2482  if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2483 
2484  }
2485 
2486 
2487  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2489  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2490 
2491  std::ostringstream oss;
2492 
2493  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2494 
2495 #ifdef HAVE_MPI
2496  int root;
2497  if (!A22_.is_null())
2498  root = comm->getRank();
2499  else
2500  root = -1;
2501 
2502  int actualRoot;
2503  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2504  root = actualRoot;
2505 #endif
2506 
2507 
2508  oss << "\n--------------------------------------------------------------------------------\n" <<
2509  "--- RefMaxwell Summary ---\n"
2510  "--------------------------------------------------------------------------------" << std::endl;
2511  oss << std::endl;
2512 
2513  GlobalOrdinal numRows;
2514  GlobalOrdinal nnz;
2515 
2516  SM_Matrix_->getRowMap()->getComm()->barrier();
2517 
2518  numRows = SM_Matrix_->getGlobalNumRows();
2519  nnz = SM_Matrix_->getGlobalNumEntries();
2520 
2521  Xpetra::global_size_t tt = numRows;
2522  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2523  tt = nnz;
2524  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2525 
2526  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2527  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2528 
2529  if (!A22_.is_null()) {
2530  // ToDo: make sure that this is printed correctly
2531  numRows = A22_->getGlobalNumRows();
2532  nnz = A22_->getGlobalNumEntries();
2533 
2534  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2535  }
2536 
2537  oss << std::endl;
2538 
2539 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2540  if (useHiptmairSmoothing_) {
2541  if (hiptmairPreSmoother_ != null && hiptmairPreSmoother_ == hiptmairPostSmoother_)
2542  oss << "Smoother both : " << hiptmairPreSmoother_->description() << std::endl;
2543  else {
2544  oss << "Smoother pre : "
2545  << (hiptmairPreSmoother_ != null ? hiptmairPreSmoother_->description() : "no smoother") << std::endl;
2546  oss << "Smoother post : "
2547  << (hiptmairPostSmoother_ != null ? hiptmairPostSmoother_->description() : "no smoother") << std::endl;
2548  }
2549 
2550  } else
2551 #endif
2552  {
2553  if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2554  oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2555  else {
2556  oss << "Smoother pre : "
2557  << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2558  oss << "Smoother post : "
2559  << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2560  }
2561  }
2562  oss << std::endl;
2563 
2564  std::string outstr = oss.str();
2565 
2566 #ifdef HAVE_MPI
2567  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2568  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2569 
2570  int strLength = outstr.size();
2571  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2572  if (comm->getRank() != root)
2573  outstr.resize(strLength);
2574  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2575 #endif
2576 
2577  out << outstr;
2578 
2579  if (!HierarchyH_.is_null())
2580  HierarchyH_->describe(out, GetVerbLevel());
2581 
2582  if (!Hierarchy22_.is_null())
2583  Hierarchy22_->describe(out, GetVerbLevel());
2584 
2585  if (IsPrint(Statistics2)) {
2586  // Print the grid of processors
2587  std::ostringstream oss2;
2588 
2589  oss2 << "Sub-solver distribution over ranks" << std::endl;
2590  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2591 
2592  int numProcs = comm->getSize();
2593 #ifdef HAVE_MPI
2594  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2595  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2596  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2597 #endif
2598 
2599  char status = 0;
2600  if (!AH_.is_null())
2601  status += 1;
2602  if (!A22_.is_null())
2603  status += 2;
2604  std::vector<char> states(numProcs, 0);
2605 #ifdef HAVE_MPI
2606  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2607 #else
2608  states.push_back(status);
2609 #endif
2610 
2611  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2612  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2613  for (int j = 0; j < rowWidth; j++)
2614  if (proc + j < numProcs)
2615  if (states[proc+j] == 0)
2616  oss2 << ".";
2617  else if (states[proc+j] == 1)
2618  oss2 << "1";
2619  else if (states[proc+j] == 2)
2620  oss2 << "2";
2621  else
2622  oss2 << "B";
2623  else
2624  oss2 << " ";
2625 
2626  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2627  }
2628  oss2 << std::endl;
2629  GetOStream(Statistics2) << oss2.str();
2630  }
2631 
2632 
2633  }
2634 
2635 } // namespace
2636 
2637 #define MUELU_REFMAXWELL_SHORT
2638 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
#define MueLu_sumAll(rcpComm, in, out)
static void SetMueLuOFileStream(const std::string &filename)
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
MueLu::DefaultLocalOrdinal LocalOrdinal
Factory for determing the number of partitions for rebalancing.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Factory for generating coarse level map. Used by TentativePFactory.
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
#define MueLu_maxAll(rcpComm, in, out)
static magnitudeType eps()
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
T & get(const std::string &name, T def_value)
void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Class that encapsulates external library smoothers.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static T squareroot(T x)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
size_type size() const
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
One-liner description of what is happening.
T * get() const
std::string tolower(const std::string &str)
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
size_type size() const
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void dump(const Matrix &A, std::string name) const
dump out matrix
void buildProlongator()
Setup the prolongator for the (1,1)-block.
MueLu::DefaultNode Node
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
#define MueLu_minAll(rcpComm, in, out)
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
static magnitudeType rmax()
Factory for building tentative prolongator.
MsgType toVerbLevel(const std::string &verbLevelStr)
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
void Build(Level &currentLevel) const
Creates pre and post smoothers.
void allocateMemory(int numVectors) const
allocate multivectors for solve
Print statistics that do not involve significant additional computation.
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
virtual void SetParameterList(const Teuchos::ParameterList &paramList)
Set parameters from a parameter list and return with default values.
Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool isSublist(const std::string &name) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
void deepCopy(const ArrayView< const T > &av)
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
virtual void setObjectLabel(const std::string &objectLabel)
AmalgamationFactory for subblocks of strided map based amalgamation data.
void applyInverse212(const MultiVector &RHS, MultiVector &X) const
apply 2-1-2 algorithm for 2x2 solve
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void applyInverse121(const MultiVector &RHS, MultiVector &X) const
apply 1-2-1 algorithm for 2x2 solve
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
void SetParameter(const std::string &name, const ParameterEntry &entry)
Set a parameter directly as a ParameterEntry.
Print all timing information.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
static magnitudeType magnitude(T a)
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
Factory for creating a graph base on a given matrix.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
void Build(Level &currentLevel) const
Build an object with this factory.
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
virtual void SetFactory(const std::string &varName, const RCP< const FactoryBase > &factory)=0
Configuration.
bool isType(const std::string &name) const
Class for transferring coordinates from a finer level to a coarser one.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
Description of what is happening (more verbose)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
Factory for building coarse matrices.
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building Smoothed Aggregation prolongators.
Factory for building uncoupled aggregates.
Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Factory for building a thresholded operator.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
bool is_null() const