MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_AggregationExportFactory.hpp"
73 #include "MueLu_Utilities.hpp"
74 
75 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
76 #include "MueLu_AmalgamationFactory_kokkos.hpp"
77 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
78 #include "MueLu_CoarseMapFactory_kokkos.hpp"
79 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
80 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
81 #include "MueLu_TentativePFactory_kokkos.hpp"
82 #include "MueLu_Utilities_kokkos.hpp"
83 #include <Kokkos_Core.hpp>
84 #include <KokkosSparse_CrsMatrix.hpp>
85 #endif
86 
87 #include "MueLu_ZoltanInterface.hpp"
88 #include "MueLu_Zoltan2Interface.hpp"
89 #include "MueLu_RepartitionHeuristicFactory.hpp"
90 #include "MueLu_RepartitionFactory.hpp"
91 #include "MueLu_RebalanceAcFactory.hpp"
92 #include "MueLu_RebalanceTransferFactory.hpp"
93 
94 #include "MueLu_VerbosityLevel.hpp"
95 
98 
99 #ifdef HAVE_MUELU_CUDA
100 #include "cuda_profiler_api.h"
101 #endif
102 
103 
104 namespace MueLu {
105 
106  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
108  return SM_Matrix_->getDomainMap();
109  }
110 
111 
112  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
114  return SM_Matrix_->getRangeMap();
115  }
116 
117 
118  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
120 
121  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
122  Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
123  if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
124  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
125  if(list.isSublist("refmaxwell: 22list"))
126  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
127  list = newList;
128  }
129 
130  parameterList_ = list;
131  disable_addon_ = list.get("refmaxwell: disable addon",MasterList::getDefault<bool>("refmaxwell: disable addon"));
132  mode_ = list.get("refmaxwell: mode",MasterList::getDefault<std::string>("refmaxwell: mode"));
133  use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner",MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
134  dump_matrices_ = list.get("refmaxwell: dump matrices",MasterList::getDefault<bool>("refmaxwell: dump matrices"));
135  implicitTranspose_ = list.get("transpose: use implicit",MasterList::getDefault<bool>("transpose: use implicit"));
136 
137  if(list.isSublist("refmaxwell: 11list"))
138  precList11_ = list.sublist("refmaxwell: 11list");
139  if(!precList11_.isType<std::string>("smoother: type") && !precList11_.isType<std::string>("smoother: pre type") && !precList11_.isType<std::string>("smoother: post type")) {
140  precList11_.set("smoother: type", "CHEBYSHEV");
141  precList11_.sublist("smoother: params").set("chebyshev: degree",2);
142  }
143 
144  if(list.isSublist("refmaxwell: 22list"))
145  precList22_ = list.sublist("refmaxwell: 22list");
146  if(!precList22_.isType<std::string>("smoother: type") && !precList22_.isType<std::string>("smoother: pre type") && !precList22_.isType<std::string>("smoother: post type")) {
147  precList22_.set("smoother: type", "CHEBYSHEV");
148  precList22_.sublist("smoother: params").set("chebyshev: degree",2);
149  }
150 
151  if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
152  list.set("smoother: type", "CHEBYSHEV");
153  list.sublist("smoother: params").set("chebyshev: degree",2);
154  }
155 
156  if(list.isSublist("smoother: params")) {
157  smootherList_ = list.sublist("smoother: params");
158  }
159 
160 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
161  useKokkos_ = false;
162 #elif defined(HAVE_MUELU_KOKKOS_REFACTOR_USE_BY_DEFAULT)
163  useKokkos_ = list.get("use kokkos refactor",true);
164 #else
165  useKokkos_ = list.get("use kokkos refactor",false);
166 #endif
167 
168  }
169 
170 
171  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
173 
174 
175  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType realType;
176 
177 #ifdef HAVE_MUELU_CUDA
178  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
179 #endif
180 
181  std::string timerLabel;
182  if (reuse)
183  timerLabel = "MueLu RefMaxwell: compute (reuse)";
184  else
185  timerLabel = "MueLu RefMaxwell: compute";
187 
188  std::map<std::string, MsgType> verbMap;
189  verbMap["none"] = None;
190  verbMap["low"] = Low;
191  verbMap["medium"] = Medium;
192  verbMap["high"] = High;
193  verbMap["extreme"] = Extreme;
194  verbMap["test"] = Test;
195 
197  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", "medium");
198 
199  TEUCHOS_TEST_FOR_EXCEPTION(verbMap.count(verbosityLevel) == 0, Exceptions::RuntimeError,
200  "Invalid verbosity level: \"" << verbosityLevel << "\"");
201  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
202 
203 
204  bool defaultFilter = false;
205 
206  // Remove zero entries from D0 if necessary.
207  // In the construction of the prolongator we use the graph of the
208  // matrix, so zero entries mess it up.
209  if (parameterList_.get<bool>("refmaxwell: filter D0", true) && D0_Matrix_->getNodeMaxNumRowEntries()>2) {
210  Level fineLevel;
211  fineLevel.SetFactoryManager(null);
212  fineLevel.SetLevelID(0);
213  fineLevel.Set("A",D0_Matrix_);
214  fineLevel.setlib(D0_Matrix_->getDomainMap()->lib());
215  // We expect D0 to have entries +-1, so any threshold value will do.
216  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",1.0e-8,/*keepDiagonal=*/false,/*expectedNNZperRow=*/2));
217  fineLevel.Request("A",ThreshFact.get());
218  ThreshFact->Build(fineLevel);
219  D0_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
220 
221  // If D0 has too many zeros, maybe SM and M1 do as well.
222  defaultFilter = true;
223  }
224 
225  if (parameterList_.get<bool>("refmaxwell: filter SM", defaultFilter)) {
226  RCP<Vector> diag = VectorFactory::Build(SM_Matrix_->getRowMap());
227  // find a reasonable absolute value threshold
228  SM_Matrix_->getLocalDiagCopy(*diag);
229  magnitudeType threshold = 1.0e-8 * diag->normInf();
230 
231  Level fineLevel;
232  fineLevel.SetFactoryManager(null);
233  fineLevel.SetLevelID(0);
234  fineLevel.Set("A",SM_Matrix_);
235  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
236  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
237  fineLevel.Request("A",ThreshFact.get());
238  ThreshFact->Build(fineLevel);
239  SM_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
240  }
241 
242  if (parameterList_.get<bool>("refmaxwell: filter M1", defaultFilter)) {
243  RCP<Vector> diag = VectorFactory::Build(M1_Matrix_->getRowMap());
244  // find a reasonable absolute value threshold
245  M1_Matrix_->getLocalDiagCopy(*diag);
246  magnitudeType threshold = 1.0e-8 * diag->normInf();
247 
248  Level fineLevel;
249  fineLevel.SetFactoryManager(null);
250  fineLevel.SetLevelID(0);
251  fineLevel.Set("A",M1_Matrix_);
252  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
253  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
254  fineLevel.Request("A",ThreshFact.get());
255  ThreshFact->Build(fineLevel);
256  M1_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
257  }
258 
259  if (!reuse) {
260  // clean rows associated with boundary conditions
261  // Find rows with only 1 or 2 nonzero entries, record them in BCrows_.
262  // BCrows_[i] is true, iff i is a boundary row
263  // BCcols_[i] is true, iff i is a boundary column
264 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
265  if (useKokkos_) {
266  BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
267  BCcolsKokkos_ = Utilities_kokkos::DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_);
268 
269  int BCrowcountLocal = 0;
270  for (size_t i = 0; i<BCrowsKokkos_.size(); i++)
271  if (BCrowsKokkos_(i))
272  BCrowcountLocal += 1;
273 #ifdef HAVE_MPI
274  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCrowcountLocal, BCrowcount_);
275 #else
276  BCrowcount_ = BCrowcountLocal;
277 #endif
278  int BCcolcountLocal = 0;
279  for (size_t i = 0; i<BCcolsKokkos_.size(); i++)
280  if (BCcolsKokkos_(i))
281  BCcolcountLocal += 1;
282 #ifdef HAVE_MPI
283  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCcolcountLocal, BCcolcount_);
284 #else
285  BCcolcount_ = BCcolcountLocal;
286 #endif
287  if (IsPrint(Statistics2)) {
288  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl;
289  }
290  } else
291 #endif
292  {
293  BCrows_ = Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
294  BCcols_ = Utilities::DetectDirichletCols(*D0_Matrix_,BCrows_);
295  int BCrowcountLocal = 0;
296  for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it)
297  if (*it)
298  BCrowcountLocal += 1;
299 #ifdef HAVE_MPI
300  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCrowcountLocal, BCrowcount_);
301 #else
302  BCrowcount_ = BCrowcountLocal;
303 #endif
304  int BCcolcountLocal = 0;
305  for (auto it = BCcols_.begin(); it != BCcols_.end(); ++it)
306  if (*it)
307  BCcolcountLocal += 1;
308 #ifdef HAVE_MPI
309  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCcolcountLocal, BCcolcount_);
310 #else
311  BCcolcount_ = BCcolcountLocal;
312 #endif
313  if (IsPrint(Statistics2)) {
314  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl;
315  }
316  }
317  }
318 
319  // build nullspace if necessary
320  if(Nullspace_ != null) {
321  // no need to do anything - nullspace is built
322  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
323  }
324  else if(Nullspace_ == null && Coords_ != null) {
325  // normalize coordinates
326  typedef typename RealValuedMultiVector::scalar_type realScalarType;
327  typedef typename Teuchos::ScalarTraits<realScalarType>::magnitudeType realMagnitudeType;
328  Array<realMagnitudeType> norms(Coords_->getNumVectors());
329  Coords_->norm2(norms);
330  for (size_t i=0;i<Coords_->getNumVectors();i++)
331  norms[i] = ((realMagnitudeType)1.0)/norms[i];
332  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
333 
334  // Cast coordinates to Scalar so they can be multiplied against D0
335  Array<Scalar> normsSC(Coords_->getNumVectors());
336  for (size_t i=0;i<Coords_->getNumVectors();i++)
337  normsSC[i] = (SC) norms[i];
338 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
339  RCP<MultiVector> CoordsSC;
340  if (useKokkos_)
341  CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
342  else
343  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
344 #else
346 #endif
347  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
348  Nullspace_->scale(normsSC());
349  }
350  else {
351  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
352  }
353 
354  if (!reuse) {
355  // Nuke the BC edges in nullspace
356 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
357  if (useKokkos_)
358  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
359  else
360  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
361 #else
362  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
363 #endif
364  }
365 
366  if (dump_matrices_)
367  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("D0_clean.mat"), *D0_Matrix_);
368 
369  // build special prolongator for (1,1)-block
370  if(P11_.is_null()) {
371  // Form A_nodal = D0* M1 D0 (aka TMT_agg)
372  Level fineLevel, coarseLevel;
373  fineLevel.SetFactoryManager(null);
374  coarseLevel.SetFactoryManager(null);
375  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
376  fineLevel.SetLevelID(0);
377  coarseLevel.SetLevelID(1);
378  fineLevel.Set("A",Ms_Matrix_);
379  coarseLevel.Set("P",D0_Matrix_);
380  coarseLevel.setlib(M1_Matrix_->getDomainMap()->lib());
381  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
382  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
383  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
384 
385  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
386  ParameterList rapList = *(rapFact->GetValidParameterList());
387  rapList.set("transpose: use implicit", parameterList_.get<bool>("transpose: use implicit", false));
388  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
389  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
390  rapFact->SetParameterList(rapList);
391 
392  RCP<TransPFactory> transPFactory;
393  if (!parameterList_.get<bool>("transpose: use implicit", false)) {
394  transPFactory = rcp(new TransPFactory());
395  rapFact->SetFactory("R", transPFactory);
396  }
397 
398  coarseLevel.Request("A", rapFact.get());
399 
400  A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
401 
402  // build special prolongator
403  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
404  buildProlongator();
405 
406  if (!implicitTranspose_) {
407 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
408  if (useKokkos_)
409  R11_ = Utilities_kokkos::Transpose(*P11_);
410  else
411  R11_ = Utilities::Transpose(*P11_);
412 #else
413  R11_ = Utilities::Transpose(*P11_);
414 #endif
415  }
416  }
417 
418  bool doRebalancing = false;
419 #ifdef HAVE_MPI
420  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
421  int numProcsAH, numProcsA22;
422 #endif
423  {
424  // build coarse grid operator for (1,1)-block
425  formCoarseMatrix();
426 
427 #ifdef HAVE_MPI
428  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
429  if (doRebalancing && numProcs > 1) {
430  GlobalOrdinal globalNumRowsAH = AH_->getRowMap()->getGlobalNumElements();
431  GlobalOrdinal globalNumRowsA22 = D0_Matrix_->getDomainMap()->getGlobalNumElements();
432  double ratio = parameterList_.get<double>("refmaxwell: ratio AH / A22 subcommunicators", MasterList::getDefault<double>("refmaxwell: ratio AH / A22 subcommunicators"));
433  numProcsAH = numProcs * globalNumRowsAH / (globalNumRowsAH + ratio*globalNumRowsA22);
434  numProcsA22 = numProcs * ratio * globalNumRowsA22 / (globalNumRowsAH + ratio*globalNumRowsA22);
435  if (numProcsAH + numProcsA22 < numProcs)
436  ++numProcsAH;
437  if (numProcsAH + numProcsA22 < numProcs)
438  ++numProcsA22;
439  numProcsAH = std::max(numProcsAH, 1);
440  numProcsA22 = std::max(numProcsA22, 1);
441  } else
442  doRebalancing = false;
443 
444  if (doRebalancing) { // rebalance AH
445  if (!reuse) {
446  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Rebalance AH"));
447 
448  Level fineLevel, coarseLevel;
449  fineLevel.SetFactoryManager(null);
450  coarseLevel.SetFactoryManager(null);
451  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
452  fineLevel.SetLevelID(0);
453  coarseLevel.SetLevelID(1);
454  coarseLevel.Set("A",AH_);
455  coarseLevel.Set("P",P11_);
456  if (!implicitTranspose_)
457  coarseLevel.Set("R",R11_);
458  coarseLevel.Set("Coordinates",CoordsH_);
459  coarseLevel.Set("number of partitions", numProcsAH);
460  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
461 
462  coarseLevel.setlib(AH_->getDomainMap()->lib());
463  fineLevel.setlib(AH_->getDomainMap()->lib());
464  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
465  fineLevel.setObjectLabel("RefMaxwell (1,1)");
466 
467  // auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
468  // ParameterList repartheurParams;
469  // repartheurParams.set("repartition: start level",0);
470  // repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: min rows per proc", 1024));
471  // repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", 0));
472  // repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
473  // repartheurFactory->SetParameterList(repartheurParams);
474 
475  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
476  RCP<Factory> partitioner;
477  if (partName == "zoltan") {
478 #ifdef HAVE_MUELU_ZOLTAN
479  partitioner = rcp(new ZoltanInterface());
480  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
481  // partitioner->SetFactory("number of partitions", repartheurFactory);
482 #else
483  throw Exceptions::RuntimeError("Zoltan interface is not available");
484 #endif
485  } else if (partName == "zoltan2") {
486 #ifdef HAVE_MUELU_ZOLTAN2
487  partitioner = rcp(new Zoltan2Interface());
488  ParameterList partParams;
489  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
490  partParams.set("ParameterList", partpartParams);
491  partitioner->SetParameterList(partParams);
492  // partitioner->SetFactory("number of partitions", repartheurFactory);
493 #else
494  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
495 #endif
496  }
497 
498  auto repartFactory = rcp(new RepartitionFactory());
499  ParameterList repartParams;
500  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
501  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
502  repartFactory->SetParameterList(repartParams);
503  // repartFactory->SetFactory("number of partitions", repartheurFactory);
504  repartFactory->SetFactory("Partition", partitioner);
505 
506  auto newP = rcp(new RebalanceTransferFactory());
507  ParameterList newPparams;
508  newPparams.set("type", "Interpolation");
509  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
510  newPparams.set("repartition: use subcommunicators", true);
511  newPparams.set("repartition: rebalance Nullspace", false);
512  newP->SetFactory("Coordinates", NoFactory::getRCP());
513  newP->SetParameterList(newPparams);
514  newP->SetFactory("Importer", repartFactory);
515 
516  // Rebalanced R
518  if (!implicitTranspose_) {
519  newR = rcp(new RebalanceTransferFactory());
520  ParameterList newRparams;
521  newRparams.set("type", "Restriction");
522  newRparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
523  newRparams.set("repartition: use subcommunicators", true);
524  newR->SetParameterList(newRparams);
525  newR->SetFactory("Importer", repartFactory);
526  }
527 
528  auto newA = rcp(new RebalanceAcFactory());
529  ParameterList rebAcParams;
530  rebAcParams.set("repartition: use subcommunicators", true);
531  newA->SetParameterList(rebAcParams);
532  newA->SetFactory("Importer", repartFactory);
533 
534  coarseLevel.Request("R", newR.get());
535  coarseLevel.Request("P", newP.get());
536  coarseLevel.Request("Importer", repartFactory.get());
537  coarseLevel.Request("A", newA.get());
538  coarseLevel.Request("Coordinates", newP.get());
539  repartFactory->Build(coarseLevel);
540 
541  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
542  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
543  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
544  if (!implicitTranspose_)
545  R11_ = coarseLevel.Get< RCP<Matrix> >("R", newR.get());
546  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
547  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
548 
549  } else {
550  ParameterList XpetraList;
551  XpetraList.set("Restrict Communicator",true);
552  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceAH");
553  RCP<const Map> targetMap = ImporterH_->getTargetMap();
554  AH_ = MatrixFactory::Build(AH_, *ImporterH_, *ImporterH_, targetMap, targetMap, rcp(&XpetraList,false));
555  }
556  if (!AH_.is_null())
557  AH_->setObjectLabel("RefMaxwell (1,1)");
558  }
559 #endif // HAVE_MPI
560 
561 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
562  // This should be taken out again as soon as
563  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
564  // drop tol != 0.0
565  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
566  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
567  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
568  precList11_.set("aggregation: drop tol", 0.0);
569  }
570 #endif
571  if (!AH_.is_null()) {
572  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
573  if (!reuse) {
574  ParameterList& userParamList = precList11_.sublist("user data");
575  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
576  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
577  } else {
578  RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
579  level0->Set("A", AH_);
580  HierarchyH_->SetupRe();
581  }
582  SetProcRankVerbose(oldRank);
583  }
584  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
585 
586  }
587 
588  if(!reuse) {
589  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
590 
591  D0_Matrix_->resumeFill();
592  // Scalar replaceWith = Teuchos::ScalarTraits<SC>::eps();
593  Scalar replaceWith = Teuchos::ScalarTraits<SC>::zero();
594 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
595  if (useKokkos_) {
596  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
597  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
598  } else {
599  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
600  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
601  }
602 #else
603  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
604  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
605 #endif
606  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
607  }
608 
609  {
610  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
611 
612  { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
613  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build A22"));
614 
615  Level fineLevel, coarseLevel;
616  fineLevel.SetFactoryManager(null);
617  coarseLevel.SetFactoryManager(null);
618  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
619  fineLevel.SetLevelID(0);
620  coarseLevel.SetLevelID(1);
621  fineLevel.Set("A",SM_Matrix_);
622  coarseLevel.Set("P",D0_Matrix_);
623  coarseLevel.Set("Coordinates",Coords_);
624 
625  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
626  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
627  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
628  fineLevel.setObjectLabel("RefMaxwell (2,2)");
629 
630  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
631  ParameterList rapList = *(rapFact->GetValidParameterList());
632  rapList.set("transpose: use implicit", implicitTranspose_);
633  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
634  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
635  rapFact->SetParameterList(rapList);
636 
637  if (!A22_AP_reuse_data_.is_null()) {
638  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
639  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
640  }
641  if (!A22_RAP_reuse_data_.is_null()) {
642  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
643  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
644  }
645 
646  RCP<TransPFactory> transPFactory;
647  if (!implicitTranspose_) {
648  transPFactory = rcp(new TransPFactory());
649  rapFact->SetFactory("R", transPFactory);
650  }
651 
652 #ifdef HAVE_MPI
653  if (doRebalancing) {
654 
655  if (!reuse) {
656 
657  coarseLevel.Set("number of partitions", numProcsA22);
658  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
659 
660  // auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
661  // ParameterList repartheurParams;
662  // repartheurParams.set("repartition: start level",0);
663  // repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: min rows per proc", 1024));
664  // repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", 0));
665  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
666  // repartheurFactory->SetParameterList(repartheurParams);
667  // repartheurFactory->SetFactory("A", rapFact);
668 
669  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
670  RCP<Factory> partitioner;
671  if (partName == "zoltan") {
672 #ifdef HAVE_MUELU_ZOLTAN
673  partitioner = rcp(new ZoltanInterface());
674  partitioner->SetFactory("A", rapFact);
675  // partitioner->SetFactory("number of partitions", repartheurFactory);
676  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
677 #else
678  throw Exceptions::RuntimeError("Zoltan interface is not available");
679 #endif
680  } else if (partName == "zoltan2") {
681 #ifdef HAVE_MUELU_ZOLTAN2
682  partitioner = rcp(new Zoltan2Interface());
683  ParameterList partParams;
684  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
685  partParams.set("ParameterList", partpartParams);
686  partitioner->SetParameterList(partParams);
687  partitioner->SetFactory("A", rapFact);
688  // partitioner->SetFactory("number of partitions", repartheurFactory);
689 #else
690  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
691 #endif
692  }
693 
694  auto repartFactory = rcp(new RepartitionFactory());
695  ParameterList repartParams;
696  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
697  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
698  repartParams.set("repartition: remap accept partition", AH_.is_null());
699  repartFactory->SetParameterList(repartParams);
700  repartFactory->SetFactory("A", rapFact);
701  // repartFactory->SetFactory("number of partitions", repartheurFactory);
702  repartFactory->SetFactory("Partition", partitioner);
703 
704  auto newP = rcp(new RebalanceTransferFactory());
705  ParameterList newPparams;
706  newPparams.set("type", "Interpolation");
707  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
708  newPparams.set("repartition: use subcommunicators", true);
709  newPparams.set("repartition: rebalance Nullspace", false);
710  newP->SetFactory("Coordinates", NoFactory::getRCP());
711  newP->SetParameterList(newPparams);
712  newP->SetFactory("Importer", repartFactory);
713 
715  if (!implicitTranspose_) {
716  // Rebalanced R
717  newR = rcp(new RebalanceTransferFactory());
718  ParameterList newRparams;
719  newRparams.set("type", "Restriction");
720  newRparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
721  newRparams.set("repartition: use subcommunicators", true);
722  newR->SetParameterList(newRparams);
723  newR->SetFactory("Importer", repartFactory);
724  newR->SetFactory("R", transPFactory);
725  }
726 
727  auto newA = rcp(new RebalanceAcFactory());
728  ParameterList rebAcParams;
729  rebAcParams.set("repartition: use subcommunicators", true);
730  newA->SetParameterList(rebAcParams);
731  newA->SetFactory("A", rapFact);
732  newA->SetFactory("Importer", repartFactory);
733 
734  if (!implicitTranspose_)
735  coarseLevel.Request("R", newR.get());
736  coarseLevel.Request("P", newP.get());
737  coarseLevel.Request("Importer", repartFactory.get());
738  coarseLevel.Request("A", newA.get());
739  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
740  if (!parameterList_.get<bool>("rap: triple product", false))
741  coarseLevel.Request("AP reuse data", rapFact.get());
742  coarseLevel.Request("RAP reuse data", rapFact.get());
743  }
744  coarseLevel.Request("Coordinates", newP.get());
745  rapFact->Build(fineLevel,coarseLevel);
746  repartFactory->Build(coarseLevel);
747 
748  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
749  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
750  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
751  if (!implicitTranspose_)
752  D0_T_Matrix_ = coarseLevel.Get< RCP<Matrix> >("R", newR.get());
753  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
754  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
755  if (!parameterList_.get<bool>("rap: triple product", false))
756  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
757  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
758  }
759  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
760  } else {
761  coarseLevel.Request("A", rapFact.get());
762  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
763  coarseLevel.Request("AP reuse data", rapFact.get());
764  coarseLevel.Request("RAP reuse data", rapFact.get());
765  }
766  if (!implicitTranspose_)
767  coarseLevel.Request("R", transPFactory.get());
768 
769  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
770  if (!implicitTranspose_)
771  D0_T_Matrix_ = coarseLevel.Get< RCP<Matrix> >("R", transPFactory.get());
772  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
773  if (!parameterList_.get<bool>("rap: triple product", false))
774  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
775  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
776  }
777 
778  ParameterList XpetraList;
779  XpetraList.set("Restrict Communicator",true);
780  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceA22");
781  RCP<const Map> targetMap = Importer22_->getTargetMap();
782  A22_ = MatrixFactory::Build(A22_, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
783  }
784  } else
785 #endif // HAVE_MPI
786  {
787  coarseLevel.Request("A", rapFact.get());
788  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
789  coarseLevel.Request("AP reuse data", rapFact.get());
790  coarseLevel.Request("RAP reuse data", rapFact.get());
791  }
792  coarseLevel.Request("R", transPFactory.get());
793 
794  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
795  if (!implicitTranspose_)
796  D0_T_Matrix_ = coarseLevel.Get< RCP<Matrix> >("R", transPFactory.get());
797  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
798  if (!parameterList_.get<bool>("rap: triple product", false))
799  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
800  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
801  }
802  }
803  }
804 
805  if (!A22_.is_null()) {
806  A22_->setObjectLabel("RefMaxwell (2,2)");
807  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
808  if (!reuse) {
809  ParameterList& userParamList = precList22_.sublist("user data");
810  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
811  // If we detected no boundary conditions, the (2,2) problem is singular.
812  // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
813  std::string coarseType = "";
814  if (precList22_.isParameter("coarse: type")) {
815  coarseType = precList22_.get<std::string>("coarse: type");
816  // Transform string to "Abcde" notation
817  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
818  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
819  }
820  if (BCrowcount_ == 0 &&
821  (coarseType == "" ||
822  coarseType == "Klu" ||
823  coarseType == "Klu2") &&
824  (!precList22_.isSublist("coarse: params") ||
825  !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
826  precList22_.sublist("coarse: params").set("fix nullspace",true);
827  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
828  } else {
829  RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
830  level0->Set("A", A22_);
831  Hierarchy22_->SetupRe();
832  }
833  SetProcRankVerbose(oldRank);
834  }
835  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
836 
837  }
838 
839  {
840  if (parameterList_.isType<std::string>("smoother: type") &&
841  parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
842  SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
843  A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
844  D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
845 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || (defined(HAVE_MUELU_INST_DOUBLE_INT_INT)))
846  ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
847 
848  if (smootherList_.isSublist("smoother: pre params"))
849  smootherPreList = smootherList_.sublist("smoother: pre params");
850  else if (smootherList_.isSublist("smoother: params"))
851  smootherPreList = smootherList_.sublist("smoother: params");
852  hiptmairPreList.set("hiptmair: smoother type 1",
853  smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
854  hiptmairPreList.set("hiptmair: smoother type 2",
855  smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
856  if(smootherPreList.isSublist("hiptmair: smoother list 1"))
857  hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
858  if(smootherPreList.isSublist("hiptmair: smoother list 2"))
859  hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
860  hiptmairPreList.set("hiptmair: pre or post",
861  smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
862  hiptmairPreList.set("hiptmair: zero starting solution",
863  smootherPreList.get<bool>("hiptmair: zero starting solution", true));
864 
865  if (smootherList_.isSublist("smoother: post params"))
866  smootherPostList = smootherList_.sublist("smoother: post params");
867  else if (smootherList_.isSublist("smoother: params"))
868  smootherPostList = smootherList_.sublist("smoother: params");
869  hiptmairPostList.set("hiptmair: smoother type 1",
870  smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
871  hiptmairPostList.set("hiptmair: smoother type 2",
872  smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
873  if(smootherPostList.isSublist("hiptmair: smoother list 1"))
874  hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
875  if(smootherPostList.isSublist("hiptmair: smoother list 2"))
876  hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
877  hiptmairPostList.set("hiptmair: pre or post",
878  smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
879  hiptmairPostList.set("hiptmair: zero starting solution",
880  smootherPostList.get<bool>("hiptmair: zero starting solution", false));
881 
882  typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
883  RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
886 
887  hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
888  hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
889  hiptmairPreSmoother_ -> initialize();
890  hiptmairPreSmoother_ -> compute();
891  hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
892  hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
893  hiptmairPostSmoother_ -> initialize();
894  hiptmairPostSmoother_ -> compute();
895  useHiptmairSmoothing_ = true;
896 #else
897  throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
898 #endif // defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
899  } else {
900  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
901  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
902  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
903 
904  ParameterList preSmootherList, postSmootherList;
905  if (parameterList_.isSublist("smoother: pre params"))
906  preSmootherList = parameterList_.sublist("smoother: pre params");
907  if (parameterList_.isSublist("smoother: post params"))
908  postSmootherList = parameterList_.sublist("smoother: post params");
909 
910  Level level;
911  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
912  level.SetFactoryManager(factoryHandler);
913  level.SetLevelID(0);
914  level.setObjectLabel("RefMaxwell (1,1)");
915  level.Set("A",SM_Matrix_);
916  level.setlib(SM_Matrix_->getDomainMap()->lib());
917 
918  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
919  RCP<SmootherFactory> preSmootherFact = rcp(new SmootherFactory(preSmootherPrototype));
920 
921  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
922  RCP<SmootherFactory> postSmootherFact = rcp(new SmootherFactory(postSmootherPrototype));
923 
924  level.Request("PreSmoother",preSmootherFact.get());
925  preSmootherFact->Build(level);
926  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",preSmootherFact.get());
927 
928  level.Request("PostSmoother",postSmootherFact.get());
929  postSmootherFact->Build(level);
930  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",postSmootherFact.get());
931  } else {
932  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
933  Level level;
934  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
935  level.SetFactoryManager(factoryHandler);
936  level.SetLevelID(0);
937  level.setObjectLabel("RefMaxwell (1,1)");
938  level.Set("A",SM_Matrix_);
939  level.setlib(SM_Matrix_->getDomainMap()->lib());
940  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
941  RCP<SmootherFactory> SmootherFact = rcp(new SmootherFactory(smootherPrototype));
942  level.Request("PreSmoother",SmootherFact.get());
943  SmootherFact->Build(level);
944  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",SmootherFact.get());
945  PostSmoother_ = PreSmoother_;
946  }
947  useHiptmairSmoothing_ = false;
948  }
949  }
950 
951  // Allocate temporary MultiVectors for solve
952  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), 1);
953  if (!ImporterH_.is_null()) {
954  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
955  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), 1);
956  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
957  } else
958  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), 1);
959  D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), 1);
960  if (!Importer22_.is_null()) {
961  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
962  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), 1);
963  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
964  } else
965  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), 1);
966  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), 1);
967 
968  if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
969  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
970  ImporterH_->setDistributorParameters(importerParams);
971  }
972  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
973  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
974  Importer22_->setDistributorParameters(importerParams);
975  }
976 
977 #ifdef HAVE_MUELU_CUDA
978  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
979 #endif
980 
981  describe(GetOStream(Runtime0));
982 
983  if (dump_matrices_) {
984  GetOStream(Runtime0) << "RefMaxwell::compute(): dumping data" << std::endl;
985  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("SM.mat"), *SM_Matrix_);
986  if(!Ms_Matrix_.is_null()) Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("Ms.mat"), *Ms_Matrix_);
987  if(!M1_Matrix_.is_null()) Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("M1.mat"), *M1_Matrix_);
988  if(!M0inv_Matrix_.is_null()) Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("M0inv.mat"), *M0inv_Matrix_);
989 #ifndef HAVE_MUELU_KOKKOS_REFACTOR
990  std::ofstream outBCrows("BCrows.mat");
991  std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows, "\n"));
992  std::ofstream outBCcols("BCcols.mat");
993  std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols, "\n"));
994 #else
995  if (useKokkos_) {
996  std::ofstream outBCrows("BCrows.mat");
997  auto BCrows = Kokkos::create_mirror_view (BCrowsKokkos_);
998  Kokkos::deep_copy(BCrows , BCrowsKokkos_);
999  for (size_t i = 0; i < BCrows.size(); i++)
1000  outBCrows << BCrows[i] << "\n";
1001 
1002  std::ofstream outBCcols("BCcols.mat");
1003  auto BCcols = Kokkos::create_mirror_view (BCcolsKokkos_);
1004  Kokkos::deep_copy(BCcols , BCcolsKokkos_);
1005  for (size_t i = 0; i < BCcols.size(); i++)
1006  outBCcols << BCcols[i] << "\n";
1007  } else {
1008  std::ofstream outBCrows("BCrows.mat");
1009  std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows, "\n"));
1010  std::ofstream outBCcols("BCcols.mat");
1011  std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols, "\n"));
1012  }
1013 #endif
1014  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("nullspace.mat"), *Nullspace_);
1015  if (Coords_ != null)
1016  Xpetra::IO<realType, LO, GlobalOrdinal, Node>::Write(std::string("coords.mat"), *Coords_);
1017  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("D0_nuked.mat"), *D0_Matrix_);
1018  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("A_nodal.mat"), *A_nodal_Matrix_);
1019  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P11.mat"), *P11_);
1020  if (!AH_.is_null())
1021  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("AH.mat"), *AH_);
1022  if (!A22_.is_null())
1023  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("A22.mat"), *A22_);
1024  }
1025 
1026  if (parameterList_.isSublist("matvec params"))
1027  {
1028  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
1029 
1030  {
1031  RCP<const Import> xpImporter = SM_Matrix_->getCrsGraph()->getImporter();
1032  if (!xpImporter.is_null())
1033  xpImporter->setDistributorParameters(matvecParams);
1034  RCP<const Export> xpExporter = SM_Matrix_->getCrsGraph()->getExporter();
1035  if (!xpExporter.is_null())
1036  xpExporter->setDistributorParameters(matvecParams);
1037  }
1038  {
1039  RCP<const Import> xpImporter = D0_Matrix_->getCrsGraph()->getImporter();
1040  if (!xpImporter.is_null())
1041  xpImporter->setDistributorParameters(matvecParams);
1042  RCP<const Export> xpExporter = D0_Matrix_->getCrsGraph()->getExporter();
1043  if (!xpExporter.is_null())
1044  xpExporter->setDistributorParameters(matvecParams);
1045  }
1046  {
1047  RCP<const Import> xpImporter = D0_T_Matrix_->getCrsGraph()->getImporter();
1048  if (!xpImporter.is_null())
1049  xpImporter->setDistributorParameters(matvecParams);
1050  RCP<const Export> xpExporter = D0_T_Matrix_->getCrsGraph()->getExporter();
1051  if (!xpExporter.is_null())
1052  xpExporter->setDistributorParameters(matvecParams);
1053  }
1054  {
1055  RCP<const Import> xpImporter = R11_->getCrsGraph()->getImporter();
1056  if (!xpImporter.is_null())
1057  xpImporter->setDistributorParameters(matvecParams);
1058  RCP<const Export> xpExporter = R11_->getCrsGraph()->getExporter();
1059  if (!xpExporter.is_null())
1060  xpExporter->setDistributorParameters(matvecParams);
1061  }
1062  {
1063  RCP<const Import> xpImporter = P11_->getCrsGraph()->getImporter();
1064  if (!xpImporter.is_null())
1065  xpImporter->setDistributorParameters(matvecParams);
1066  RCP<const Export> xpExporter = P11_->getCrsGraph()->getExporter();
1067  if (!xpExporter.is_null())
1068  xpExporter->setDistributorParameters(matvecParams);
1069  }
1070  if (!ImporterH_.is_null())
1071  ImporterH_->setDistributorParameters(matvecParams);
1072  if (!Importer22_.is_null())
1073  Importer22_->setDistributorParameters(matvecParams);
1074  }
1075 
1076 
1077  VerboseObject::SetDefaultVerbLevel(oldVerbLevel);
1078  }
1079 
1080 
1081  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1083  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1084  //
1085  // The old implementation used
1086  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1087  // yet the paper gives
1088  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1089  // where phi_k is the k-th nullspace vector.
1090  //
1091  // The graph of D0 contains the incidence from nodes to edges.
1092  // The nodal prolongator P maps aggregates to nodes.
1093 
1094  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1095  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1096  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1097  size_t dim = Nullspace_->getNumVectors();
1098  size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1099 
1100  // build prolongator: algorithm 1 in the reference paper
1101  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1102  RCP<Matrix> P_nodal;
1103  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1104  if (read_P_from_file) {
1105  // This permits to read in an ML prolongator, so that we get the same hierarchy.
1106  // (ML and MueLu typically produce different aggregates.)
1107  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.mat"));
1108  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap());
1109  } else {
1110  Level fineLevel, coarseLevel;
1111  fineLevel.SetFactoryManager(null);
1112  coarseLevel.SetFactoryManager(null);
1113  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1114  fineLevel.SetLevelID(0);
1115  coarseLevel.SetLevelID(1);
1116  fineLevel.Set("A",A_nodal_Matrix_);
1117  fineLevel.Set("Coordinates",Coords_);
1118  fineLevel.Set("DofsPerNode",1);
1119  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1120  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1121  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
1122  fineLevel.setObjectLabel("RefMaxwell (1,1)");
1123 
1124  LocalOrdinal NSdim = 1;
1125  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1126  nullSpace->putScalar(SC_ONE);
1127  fineLevel.Set("Nullspace",nullSpace);
1128 
1129 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1130  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact;
1131  if (useKokkos_) {
1132  amalgFact = rcp(new AmalgamationFactory_kokkos());
1133  dropFact = rcp(new CoalesceDropFactory_kokkos());
1134  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1135  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1136  TentativePFact = rcp(new TentativePFactory_kokkos());
1137  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1138  } else {
1139  amalgFact = rcp(new AmalgamationFactory());
1140  dropFact = rcp(new CoalesceDropFactory());
1141  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1142  coarseMapFact = rcp(new CoarseMapFactory());
1143  TentativePFact = rcp(new TentativePFactory());
1144  Tfact = rcp(new CoordinatesTransferFactory());
1145  }
1146 #else
1150  RCP<CoarseMapFactory> coarseMapFact = rcp(new CoarseMapFactory());
1151  RCP<TentativePFactory> TentativePFact = rcp(new TentativePFactory());
1153 #endif
1154  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1155  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1156  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1157 
1158  UncoupledAggFact->SetFactory("Graph", dropFact);
1159 
1160  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1161 
1162  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1163  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1164  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1165 
1166  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1167  Tfact->SetFactory("CoarseMap", coarseMapFact);
1168 
1169  coarseLevel.Request("P",TentativePFact.get());
1170  coarseLevel.Request("Coordinates",Tfact.get());
1171 
1173  if (parameterList_.get("aggregation: export visualization data",false)) {
1174  aggExport = rcp(new AggregationExportFactory());
1175  ParameterList aggExportParams;
1176  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1177  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1178  aggExport->SetParameterList(aggExportParams);
1179 
1180  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1181  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1182  fineLevel.Request("Aggregates",UncoupledAggFact.get());
1183  fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1184  }
1185 
1186  coarseLevel.Get("P",P_nodal,TentativePFact.get());
1187  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1188 
1189  if (parameterList_.get("aggregation: export visualization data",false))
1190  aggExport->Build(fineLevel, coarseLevel);
1191  }
1192  if (dump_matrices_)
1193  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P_nodal.mat"), *P_nodal);
1194 
1195  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1196 
1197  // Import off-rank rows of P_nodal into P_nodal_imported
1198  RCP<CrsMatrix> P_nodal_imported;
1199  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1200  if (numProcs > 1) {
1201  RCP<CrsMatrixWrap> P_nodal_temp;
1202  RCP<const Map> targetMap = D0Crs->getColMap();
1203  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap, 0));
1204  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1205  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1206  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1207  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1208  P_nodal_imported = P_nodal_temp->getCrsMatrix();
1209  if (dump_matrices_)
1210  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P_nodal_imported.mat"), *P_nodal_temp);
1211  } else
1212  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1213 
1214 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1215  if (useKokkos_) {
1216 
1217  using ATS = Kokkos::ArithTraits<SC>;
1219 
1220  typedef typename Matrix::local_matrix_type KCRS;
1221  typedef typename KCRS::device_type device_t;
1222  typedef typename KCRS::StaticCrsGraphType graph_t;
1223  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1224  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1225  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1226 
1227  // Get data out of P_nodal_imported and D0.
1228  auto localP = P_nodal_imported->getLocalMatrix();
1229  auto localD0 = D0_Matrix_->getLocalMatrix();
1230 
1231  // Which algorithm should we use for the construction of the special prolongator?
1232  // Option "mat-mat":
1233  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1234  std::string defaultAlgo = "mat-mat";
1235  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1236 
1237  if (algo == "mat-mat") {
1238  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1239  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1240 
1241  // Get data out of D0*P.
1242  auto localD0P = D0_P_nodal->getLocalMatrix();
1243 
1244  // Create the matrix object
1245  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1246  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1247 
1248  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1249  lno_nnz_view_t P11colind("P11_colind",dim*localD0P.graph.entries.size());
1250  scalar_view_t P11vals("P11_vals",dim*localD0P.graph.entries.size());
1251 
1252  // adjust rowpointer
1253  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1254  KOKKOS_LAMBDA(const size_t i) {
1255  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1256  });
1257 
1258  // adjust column indices
1259  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1260  KOKKOS_LAMBDA(const size_t jj) {
1261  for (size_t k = 0; k < dim; k++) {
1262  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1263  P11vals(dim*jj+k) = SC_ZERO;
1264  }
1265  });
1266 
1267  auto localNullspace = Nullspace_->template getLocalView<device_t>();
1268 
1269  // enter values
1270  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1271  // The matrix D0 has too many entries per row.
1272  // Therefore we need to check whether its entries are actually non-zero.
1273  // This is the case for the matrices built by MiniEM.
1274  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1275 
1277 
1278  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1279  KOKKOS_LAMBDA(const size_t i) {
1280  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1281  LO l = localD0.graph.entries(ll);
1282  SC p = localD0.values(ll);
1283  if (ATS::magnitude(p) < tol)
1284  continue;
1285  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1286  LO j = localP.graph.entries(jj);
1287  SC v = localP.values(jj);
1288  for (size_t k = 0; k < dim; k++) {
1289  LO jNew = dim*j+k;
1290  SC n = localNullspace(i,k);
1291  size_t m;
1292  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1293  if (P11colind(m) == jNew)
1294  break;
1295 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1296  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1297 #endif
1298  P11vals(m) += half * v * n;
1299  }
1300  }
1301  }
1302  });
1303 
1304  } else {
1305  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1306  KOKKOS_LAMBDA(const size_t i) {
1307  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1308  LO l = localD0.graph.entries(ll);
1309  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1310  LO j = localP.graph.entries(jj);
1311  SC v = localP.values(jj);
1312  for (size_t k = 0; k < dim; k++) {
1313  LO jNew = dim*j+k;
1314  SC n = localNullspace(i,k);
1315  size_t m;
1316  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1317  if (P11colind(m) == jNew)
1318  break;
1319 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1320  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1321 #endif
1322  P11vals(m) += half * v * n;
1323  }
1324  }
1325  }
1326  });
1327  }
1328 
1329  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1330  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1331  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1332  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1333 
1334  } else
1335  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1336  } else {
1337 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1338 
1339  // get nullspace vectors
1340  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1341  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1342  for(size_t i=0; i<dim; i++) {
1343  nullspaceRCP[i] = Nullspace_->getData(i);
1344  nullspace[i] = nullspaceRCP[i]();
1345  }
1346 
1347  // Get data out of P_nodal_imported and D0.
1348  ArrayRCP<const size_t> Prowptr_RCP, D0rowptr_RCP;
1349  ArrayRCP<const LO> Pcolind_RCP, D0colind_RCP;
1350  ArrayRCP<const SC> Pvals_RCP, D0vals_RCP;
1351  ArrayRCP<size_t> P11rowptr_RCP;
1352  ArrayRCP<LO> P11colind_RCP;
1353  ArrayRCP<SC> P11vals_RCP;
1354 
1355  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1356  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1357 
1358  // For efficiency
1359  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1360  // slower than Teuchos::ArrayView::operator[].
1361  ArrayView<const size_t> Prowptr, D0rowptr;
1362  ArrayView<const LO> Pcolind, D0colind;
1363  ArrayView<const SC> Pvals, D0vals;
1364  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1365  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1366 
1367  // Which algorithm should we use for the construction of the special prolongator?
1368  // Option "mat-mat":
1369  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1370  // Option "gustavson":
1371  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1372  // More efficient, but only available for serial node.
1373  std::string defaultAlgo = "gustavson";
1374  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1375 
1376  if (algo == "mat-mat") {
1377  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1378  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1379 
1380  // Get data out of D0*P.
1381  ArrayRCP<const size_t> D0Prowptr_RCP;
1382  ArrayRCP<const LO> D0Pcolind_RCP;
1383  ArrayRCP<const SC> D0Pvals_RCP;
1384  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1385 
1386  // For efficiency
1387  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1388  // slower than Teuchos::ArrayView::operator[].
1389  ArrayView<const size_t> D0Prowptr;
1390  ArrayView<const LO> D0Pcolind;
1391  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1392 
1393  // Create the matrix object
1394  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1395  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1396  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1397  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1398  P11Crs->allocateAllValues(dim*D0Pcolind.size(), P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1399 
1400  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1401  ArrayView<LO> P11colind = P11colind_RCP();
1402  ArrayView<SC> P11vals = P11vals_RCP();
1403 
1404  // adjust rowpointer
1405  for (size_t i = 0; i < numLocalRows+1; i++) {
1406  P11rowptr[i] = dim*D0Prowptr[i];
1407  }
1408 
1409  // adjust column indices
1410  size_t nnz = 0;
1411  for (size_t jj = 0; jj < (size_t) D0Pcolind.size(); jj++)
1412  for (size_t k = 0; k < dim; k++) {
1413  P11colind[nnz] = dim*D0Pcolind[jj]+k;
1414  P11vals[nnz] = SC_ZERO;
1415  nnz++;
1416  }
1417 
1418  // enter values
1419  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1420  // The matrix D0 has too many entries per row.
1421  // Therefore we need to check whether its entries are actually non-zero.
1422  // This is the case for the matrices built by MiniEM.
1423  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1424 
1426  for (size_t i = 0; i < numLocalRows; i++) {
1427  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1428  LO l = D0colind[ll];
1429  SC p = D0vals[ll];
1431  continue;
1432  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1433  LO j = Pcolind[jj];
1434  SC v = Pvals[jj];
1435  for (size_t k = 0; k < dim; k++) {
1436  LO jNew = dim*j+k;
1437  SC n = nullspace[k][i];
1438  size_t m;
1439  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1440  if (P11colind[m] == jNew)
1441  break;
1442 #ifdef HAVE_MUELU_DEBUG
1443  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1444 #endif
1445  P11vals[m] += half * v * n;
1446  }
1447  }
1448  }
1449  }
1450  } else {
1451  // enter values
1452  for (size_t i = 0; i < numLocalRows; i++) {
1453  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1454  LO l = D0colind[ll];
1455  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1456  LO j = Pcolind[jj];
1457  SC v = Pvals[jj];
1458  for (size_t k = 0; k < dim; k++) {
1459  LO jNew = dim*j+k;
1460  SC n = nullspace[k][i];
1461  size_t m;
1462  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1463  if (P11colind[m] == jNew)
1464  break;
1465 #ifdef HAVE_MUELU_DEBUG
1466  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1467 #endif
1468  P11vals[m] += half * v * n;
1469  }
1470  }
1471  }
1472  }
1473  }
1474 
1475  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1476  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1477 
1478  } else if (algo == "gustavson") {
1479 
1480  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1481  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1482  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1483  // This is ad-hoc and should maybe be replaced with some better heuristics.
1484  size_t nnz_alloc = dim*D0vals_RCP.size();
1485 
1486  // Create the matrix object
1487  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1488  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1489  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1490  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1491  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1492 
1493  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1494  ArrayView<LO> P11colind = P11colind_RCP();
1495  ArrayView<SC> P11vals = P11vals_RCP();
1496 
1497  size_t nnz;
1498  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1499  // The matrix D0 has too many entries per row.
1500  // Therefore we need to check whether its entries are actually non-zero.
1501  // This is the case for the matrices built by MiniEM.
1502  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1503 
1505  nnz = 0;
1506  size_t nnz_old = 0;
1507  for (size_t i = 0; i < numLocalRows; i++) {
1508  P11rowptr[i] = nnz;
1509  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1510  LO l = D0colind[ll];
1511  SC p = D0vals[ll];
1513  continue;
1514  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1515  LO j = Pcolind[jj];
1516  SC v = Pvals[jj];
1517  for (size_t k = 0; k < dim; k++) {
1518  LO jNew = dim*j+k;
1519  SC n = nullspace[k][i];
1520  // do we already have an entry for (i, jNew)?
1521  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1522  P11_status[jNew] = nnz;
1523  P11colind[nnz] = jNew;
1524  P11vals[nnz] = half * v * n;
1525  // or should it be
1526  // P11vals[nnz] = half * n;
1527  nnz++;
1528  } else {
1529  P11vals[P11_status[jNew]] += half * v * n;
1530  // or should it be
1531  // P11vals[P11_status[jNew]] += half * n;
1532  }
1533  }
1534  }
1535  }
1536  nnz_old = nnz;
1537  }
1538  P11rowptr[numLocalRows] = nnz;
1539  } else {
1540  nnz = 0;
1541  size_t nnz_old = 0;
1542  for (size_t i = 0; i < numLocalRows; i++) {
1543  P11rowptr[i] = nnz;
1544  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1545  LO l = D0colind[ll];
1546  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1547  LO j = Pcolind[jj];
1548  SC v = Pvals[jj];
1549  for (size_t k = 0; k < dim; k++) {
1550  LO jNew = dim*j+k;
1551  SC n = nullspace[k][i];
1552  // do we already have an entry for (i, jNew)?
1553  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1554  P11_status[jNew] = nnz;
1555  P11colind[nnz] = jNew;
1556  P11vals[nnz] = half * v * n;
1557  // or should it be
1558  // P11vals[nnz] = half * n;
1559  nnz++;
1560  } else {
1561  P11vals[P11_status[jNew]] += half * v * n;
1562  // or should it be
1563  // P11vals[P11_status[jNew]] += half * n;
1564  }
1565  }
1566  }
1567  }
1568  nnz_old = nnz;
1569  }
1570  P11rowptr[numLocalRows] = nnz;
1571  }
1572 
1573  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1574  // Downward resize
1575  // - Cannot resize for Epetra, as it checks for same pointers
1576  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1577  P11vals_RCP.resize(nnz);
1578  P11colind_RCP.resize(nnz);
1579  }
1580 
1581  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1582  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1583  } else
1584  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1585 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1586  }
1587 #endif
1588  }
1589 
1590  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1592  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build coarse (1,1) matrix"));
1593 
1594  // coarse matrix for P11* (M1 + D1* M2 D1) P11
1595  RCP<Matrix> Matrix1;
1596  {
1597  Level fineLevel, coarseLevel;
1598  fineLevel.SetFactoryManager(null);
1599  coarseLevel.SetFactoryManager(null);
1600  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1601  fineLevel.SetLevelID(0);
1602  coarseLevel.SetLevelID(1);
1603  fineLevel.Set("A",SM_Matrix_);
1604  coarseLevel.Set("P",P11_);
1605  if (!implicitTranspose_)
1606  coarseLevel.Set("R",R11_);
1607 
1608  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1609  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1610  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
1611  fineLevel.setObjectLabel("RefMaxwell (1,1)");
1612 
1613  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
1614  ParameterList rapList = *(rapFact->GetValidParameterList());
1615  rapList.set("transpose: use implicit", implicitTranspose_);
1616  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
1617  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
1618  rapFact->SetParameterList(rapList);
1619 
1620  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1621  if (!parameterList_.get<bool>("rap: triple product", false))
1622  coarseLevel.Request("AP reuse data", rapFact.get());
1623  coarseLevel.Request("RAP reuse data", rapFact.get());
1624  }
1625 
1626  if (!AH_AP_reuse_data_.is_null()) {
1627  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
1628  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", AH_AP_reuse_data_, rapFact.get());
1629  }
1630  if (!AH_RAP_reuse_data_.is_null()) {
1631  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
1632  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", AH_RAP_reuse_data_, rapFact.get());
1633  }
1634 
1635  coarseLevel.Request("A", rapFact.get());
1636 
1637  Matrix1 = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1638  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1639  if (!parameterList_.get<bool>("rap: triple product", false))
1640  AH_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1641  AH_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1642  }
1643  }
1644 
1645  if (!AH_.is_null())
1646  AH_ = Teuchos::null;
1647 
1648  if(disable_addon_==true) {
1649  // if add-on is not chosen
1650  AH_=Matrix1;
1651  }
1652  else {
1653  if (Addon_Matrix_.is_null()) {
1654  Teuchos::TimeMonitor tmAddon(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build coarse addon matrix"));
1655  // catch a failure
1656  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1657  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1658  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1659 
1660  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
1661  RCP<Matrix> Zaux = MatrixFactory::Build(M1_Matrix_->getRowMap(),0);
1662  RCP<Matrix> Z = MatrixFactory::Build(D0_Matrix_->getDomainMap(),0);
1663 
1664  // construct Zaux = M1 P11
1665  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,*Zaux,true,true);
1666  // construct Z = D0* M1 P11 = D0* Zaux
1667  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,*Z,true,true);
1668 
1669  // construct Z* M0inv Z
1670  RCP<Matrix> Matrix2 = MatrixFactory::Build(Z->getDomainMap(),0);
1671  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
1672  // We assume that if M0inv has at most one entry per row then
1673  // these are all diagonal entries.
1674 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1675  RCP<Matrix> ZT;
1676  if (useKokkos_)
1677  ZT = Utilities_kokkos::Transpose(*Z);
1678  else
1679  ZT = Utilities::Transpose(*Z);
1680 #else
1682 #endif
1683  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
1684  M0inv_Matrix_->getLocalDiagCopy(*diag);
1685  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
1686  Z->leftScale(*diag);
1687  else {
1688  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
1689  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
1690  diag2->doImport(*diag,*importer,Xpetra::INSERT);
1691  Z->leftScale(*diag2);
1692  }
1693  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*ZT,false,*Z,false,*Matrix2,true,true);
1694  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
1695  RCP<Matrix> C2 = MatrixFactory::Build(M0inv_Matrix_->getRowMap(),0);
1696  // construct C2 = M0inv Z
1697  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,*C2,true,true);
1698  // construct Matrix2 = Z* M0inv Z = Z* C2
1699  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,*Matrix2,true,true);
1700  } else {
1701  // construct Matrix2 = Z* M0inv Z
1703  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *Matrix2, true, true);
1704  }
1705  // Should we keep the addon for next setup?
1706  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none")
1707  Addon_Matrix_ = Matrix2;
1708 
1709  // add matrices together
1710  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,(Scalar)1.0,*Matrix2,false,(Scalar)1.0,AH_,GetOStream(Runtime0));
1711  AH_->fillComplete();
1712  } else {
1713  // add matrices together
1714  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,(Scalar)1.0,*Addon_Matrix_,false,(Scalar)1.0,AH_,GetOStream(Runtime0));
1715  AH_->fillComplete();
1716  }
1717  }
1718 
1719  // set fixed block size for vector nodal matrix
1720  size_t dim = Nullspace_->getNumVectors();
1721  AH_->SetFixedBlockSize(dim);
1722  AH_->setObjectLabel("RefMaxwell (1,1)");
1723 
1724  }
1725 
1726 
1727  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1729  bool reuse = !SM_Matrix_.is_null();
1730  SM_Matrix_ = SM_Matrix_new;
1731  if (ComputePrec)
1732  compute(reuse);
1733  }
1734 
1735 
1736  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1737  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
1738 
1740 
1741  { // compute residuals
1742 
1743  Teuchos::TimeMonitor tmRes(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: residual calculation"));
1744  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1745  if (implicitTranspose_) {
1746  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
1747  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
1748  } else {
1749  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1750  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1751  }
1752  }
1753 
1754  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
1755 
1756  if (!ImporterH_.is_null()) {
1757  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import coarse (1,1)"));
1758  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
1759  P11res_.swap(P11resTmp_);
1760  }
1761  if (!Importer22_.is_null()) {
1762  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import (2,2)"));
1763  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
1764  D0res_.swap(D0resTmp_);
1765  }
1766 
1767  // iterate on coarse (1, 1) block
1768  if (!AH_.is_null()) {
1769  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve coarse (1,1)"));
1770 
1771  RCP<const Map> origXMap = P11x_->getMap();
1772  RCP<const Map> origRhsMap = P11res_->getMap();
1773 
1774  // Replace maps with maps with a subcommunicator
1775  P11res_->replaceMap(AH_->getRangeMap());
1776  P11x_ ->replaceMap(AH_->getDomainMap());
1777  HierarchyH_->Iterate(*P11res_, *P11x_, 1, true);
1778  P11x_ ->replaceMap(origXMap);
1779  P11res_->replaceMap(origRhsMap);
1780  }
1781 
1782  // iterate on (2, 2) block
1783  if (!A22_.is_null()) {
1784  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve (2,2)"));
1785 
1786  RCP<const Map> origXMap = D0x_->getMap();
1787  RCP<const Map> origRhsMap = D0res_->getMap();
1788 
1789  // Replace maps with maps with a subcommunicator
1790  D0res_->replaceMap(A22_->getRangeMap());
1791  D0x_ ->replaceMap(A22_->getDomainMap());
1792  Hierarchy22_->Iterate(*D0res_, *D0x_, 1, true);
1793  D0x_ ->replaceMap(origXMap);
1794  D0res_->replaceMap(origRhsMap);
1795  }
1796 
1797  if (!Importer22_.is_null()) {
1798  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export (2,2)"));
1799  D0xTmp_->doExport(*D0x_, *Importer22_, Xpetra::INSERT);
1800  D0x_.swap(D0xTmp_);
1801  }
1802 
1803  if (!ImporterH_.is_null()) {
1804  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export coarse (1,1)"));
1805  P11xTmp_->doExport(*P11x_, *ImporterH_, Xpetra::INSERT);
1806  P11x_.swap(P11xTmp_);
1807  }
1808 
1809  { // update current solution
1810  Teuchos::TimeMonitor tmUp(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: update"));
1811  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1812  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
1813  X.update(one, *residual_, one);
1814 
1815  if (!ImporterH_.is_null()) {
1816  P11res_.swap(P11resTmp_);
1817  P11x_.swap(P11xTmp_);
1818  }
1819  if (!Importer22_.is_null()) {
1820  D0res_.swap(D0resTmp_);
1821  D0x_.swap(D0xTmp_);
1822  }
1823 
1824  }
1825  }
1826 
1827 
1828  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1829  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse121(const MultiVector& RHS, MultiVector& X) const {
1830 
1831  // precondition (1,1)-block
1832  solveH(RHS,X);
1833  // precondition (2,2)-block
1834  solve22(RHS,X);
1835  // precondition (1,1)-block
1836  solveH(RHS,X);
1837 
1838  }
1839 
1840 
1841  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1842  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse212(const MultiVector& RHS, MultiVector& X) const {
1843 
1844  // precondition (2,2)-block
1845  solve22(RHS,X);
1846  // precondition (1,1)-block
1847  solveH(RHS,X);
1848  // precondition (2,2)-block
1849  solve22(RHS,X);
1850 
1851  }
1852 
1853  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1854  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
1855 
1857 
1858  { // compute residual
1859  Teuchos::TimeMonitor tmRes(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: residual calculation"));
1860  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
1861  if (implicitTranspose_)
1862  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
1863  else
1864  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1865  }
1866 
1867  { // solve coarse (1,1) block
1868  if (!ImporterH_.is_null()) {
1869  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import coarse (1,1)"));
1870  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
1871  P11res_.swap(P11resTmp_);
1872  }
1873  if (!AH_.is_null()) {
1874  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve coarse (1,1)"));
1875 
1876  RCP<const Map> origXMap = P11x_->getMap();
1877  RCP<const Map> origRhsMap = P11res_->getMap();
1878 
1879  // Replace maps with maps with a subcommunicator
1880  P11res_->replaceMap(AH_->getRangeMap());
1881  P11x_ ->replaceMap(AH_->getDomainMap());
1882  HierarchyH_->Iterate(*P11res_, *P11x_, 1, true);
1883  P11x_ ->replaceMap(origXMap);
1884  P11res_->replaceMap(origRhsMap);
1885  }
1886  if (!ImporterH_.is_null()) {
1887  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export coarse (1,1)"));
1888  P11xTmp_->doExport(*P11x_, *ImporterH_, Xpetra::INSERT);
1889  P11x_.swap(P11xTmp_);
1890  }
1891  }
1892 
1893  { // update current solution
1894  Teuchos::TimeMonitor tmUp(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: update"));
1895  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1896  X.update(one, *residual_, one);
1897  }
1898 
1899  }
1900 
1901 
1902  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1903  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
1904 
1906 
1907  { // compute residual
1908  Teuchos::TimeMonitor tmRes(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: residual calculation"));
1909  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1910  if (implicitTranspose_)
1911  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
1912  else
1913  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1914  }
1915 
1916  { // solve (2,2) block
1917  if (!Importer22_.is_null()) {
1918  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import (2,2)"));
1919  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
1920  D0res_.swap(D0resTmp_);
1921  }
1922  if (!A22_.is_null()) {
1923  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve (2,2)"));
1924 
1925  RCP<const Map> origXMap = D0x_->getMap();
1926  RCP<const Map> origRhsMap = D0res_->getMap();
1927 
1928  // Replace maps with maps with a subcommunicator
1929  D0res_->replaceMap(A22_->getRangeMap());
1930  D0x_ ->replaceMap(A22_->getDomainMap());
1931  Hierarchy22_->Iterate(*D0res_, *D0x_, 1, true);
1932  D0x_ ->replaceMap(origXMap);
1933  D0res_->replaceMap(origRhsMap);
1934  }
1935  if (!Importer22_.is_null()) {
1936  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export (2,2)"));
1937  D0xTmp_->doExport(*D0x_, *Importer22_, Xpetra::INSERT);
1938  D0x_.swap(D0xTmp_);
1939  }
1940  }
1941 
1942  { //update current solution
1943  Teuchos::TimeMonitor tmUp(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: update"));
1944  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
1945  X.update(one, *residual_, one);
1946  }
1947 
1948  }
1949 
1950 
1951  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1952  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
1953  Teuchos::ETransp /* mode */,
1954  Scalar /* alpha */,
1955  Scalar /* beta */) const {
1956 
1957  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve"));
1958 
1959  // make sure that we have enough temporary memory
1960  if (X.getNumVectors() != P11res_->getNumVectors()) {
1961  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), X.getNumVectors());
1962  if (!ImporterH_.is_null()) {
1963  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1964  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), X.getNumVectors());
1965  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1966  } else
1967  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), X.getNumVectors());
1968  if (!Importer22_.is_null()) {
1969  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1970  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), X.getNumVectors());
1971  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1972  } else
1973  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), X.getNumVectors());
1974  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), X.getNumVectors());
1975 
1976  }
1977 
1978  { // apply pre-smoothing
1979 
1980  Teuchos::TimeMonitor tmSm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: smoothing"));
1981 
1982 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
1983  if (useHiptmairSmoothing_) {
1984  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
1985  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
1986  hiptmairPreSmoother_->apply(tRHS, tX);
1987  }
1988  else
1989 #endif
1990  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
1991  }
1992 
1993  // do solve for the 2x2 block system
1994  if(mode_=="additive")
1995  applyInverseAdditive(RHS,X);
1996  else if(mode_=="121")
1997  applyInverse121(RHS,X);
1998  else if(mode_=="212")
1999  applyInverse212(RHS,X);
2000  else if(mode_=="1")
2001  solveH(RHS,X);
2002  else if(mode_=="2")
2003  solve22(RHS,X);
2004  else if(mode_=="none") {
2005  // do nothing
2006  }
2007  else
2008  applyInverseAdditive(RHS,X);
2009 
2010  { // apply post-smoothing
2011 
2012  Teuchos::TimeMonitor tmSm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: smoothing"));
2013 
2014 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
2015  if (useHiptmairSmoothing_)
2016  {
2017  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2018  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2019  hiptmairPostSmoother_->apply(tRHS, tX);
2020  }
2021  else
2022 #endif
2023  PostSmoother_->Apply(X, RHS, false);
2024  }
2025 
2026  }
2027 
2028 
2029  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2031  return false;
2032  }
2033 
2034 
2035  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2038  const Teuchos::RCP<Matrix> & Ms_Matrix,
2039  const Teuchos::RCP<Matrix> & M0inv_Matrix,
2040  const Teuchos::RCP<Matrix> & M1_Matrix,
2041  const Teuchos::RCP<MultiVector> & Nullspace,
2042  const Teuchos::RCP<RealValuedMultiVector> & Coords,
2043  Teuchos::ParameterList& List)
2044  {
2045  // some pre-conditions
2046  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2047  TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2048  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2049 
2050  HierarchyH_ = Teuchos::null;
2051  Hierarchy22_ = Teuchos::null;
2052  PreSmoother_ = Teuchos::null;
2053  PostSmoother_ = Teuchos::null;
2054  disable_addon_ = false;
2055  mode_ = "additive";
2056 
2057  // set parameters
2058  setParameters(List);
2059 
2060  D0_Matrix_ = D0_Matrix;
2061  M0inv_Matrix_ = M0inv_Matrix;
2062  Ms_Matrix_ = Ms_Matrix;
2063  M1_Matrix_ = M1_Matrix;
2064  Coords_ = Coords;
2065  Nullspace_ = Nullspace;
2066  }
2067 
2068  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2070  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2071 
2072  std::ostringstream oss;
2073 
2074  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2075 
2076 #ifdef HAVE_MPI
2077  int root;
2078  if (!A22_.is_null())
2079  root = comm->getRank();
2080  else
2081  root = -1;
2082 
2083  int actualRoot;
2084  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2085  root = actualRoot;
2086 #endif
2087 
2088 
2089  oss << "\n--------------------------------------------------------------------------------\n" <<
2090  "--- RefMaxwell Summary ---\n"
2091  "--------------------------------------------------------------------------------" << std::endl;
2092  oss << std::endl;
2093 
2094  GlobalOrdinal numRows;
2095  GlobalOrdinal nnz;
2096 
2097  SM_Matrix_->getRowMap()->getComm()->barrier();
2098 
2099  numRows = SM_Matrix_->getGlobalNumRows();
2100  nnz = SM_Matrix_->getGlobalNumEntries();
2101 
2102  Xpetra::global_size_t tt = numRows;
2103  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2104  tt = nnz;
2105  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2106 
2107  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2108  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2109 
2110  if (!A22_.is_null()) {
2111  // ToDo: make sure that this is printed correctly
2112  numRows = A22_->getGlobalNumRows();
2113  nnz = A22_->getGlobalNumEntries();
2114 
2115  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2116  }
2117 
2118  oss << std::endl;
2119 
2120  std::string outstr = oss.str();
2121 
2122 #ifdef HAVE_MPI
2123  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2124  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2125 
2126  int strLength = outstr.size();
2127  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2128  if (comm->getRank() != root)
2129  outstr.resize(strLength);
2130  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2131 #endif
2132 
2133  out << outstr;
2134 
2135  if (IsPrint(Statistics2)) {
2136  // Print the grid of processors
2137  std::ostringstream oss2;
2138 
2139  oss2 << "Sub-solver distribution over ranks" << std::endl;
2140  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2141 
2142  int numProcs = comm->getSize();
2143 #ifdef HAVE_MPI
2144  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2145  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2146  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2147 #endif
2148 
2149  char status = 0;
2150  if (!AH_.is_null())
2151  status += 1;
2152  if (!A22_.is_null())
2153  status += 2;
2154  std::vector<char> states(numProcs, 0);
2155 #ifdef HAVE_MPI
2156  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2157 #else
2158  states.push_back(status);
2159 #endif
2160 
2161  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2162  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2163  for (int j = 0; j < rowWidth; j++)
2164  if (proc + j < numProcs)
2165  if (states[proc+j] == 0)
2166  oss2 << ".";
2167  else if (states[proc+j] == 1)
2168  oss2 << "1";
2169  else if (states[proc+j] == 2)
2170  oss2 << "2";
2171  else
2172  oss2 << "B";
2173  else
2174  oss2 << " ";
2175 
2176  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2177  }
2178  oss2 << std::endl;
2179  GetOStream(Statistics2) << oss2.str();
2180  }
2181 
2182 
2183  }
2184 
2185 } // namespace
2186 
2187 #define MUELU_REFMAXWELL_SHORT
2188 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
#define MueLu_sumAll(rcpComm, in, out)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
MueLu::DefaultLocalOrdinal LocalOrdinal
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Factory for generating coarse level map. Used by TentativePFactory.
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
static magnitudeType eps()
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
T & get(const std::string &name, T def_value)
Class that encapsulates external library smoothers.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static void MultiplyRAP(const Matrix &R, bool transposeR, const Matrix &A, bool transposeA, const Matrix &P, bool transposeP, Matrix &Ac, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
size_type size() const
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
LocalOrdinal LO
One-liner description of what is happening.
T * get() const
std::string tolower(const std::string &str)
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
size_type size() const
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void buildProlongator()
Setup the prolongator for the (1,1)-block.
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
static RCP< Time > getNewTimer(const std::string &name)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
void resize(const size_type n, const T &val=T())
void Build(Level &currentLevel) const
Creates pre and post smoothers.
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
virtual void SetParameterList(const Teuchos::ParameterList &paramList)
Set parameters from a parameter list and return with default values.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
bool isSublist(const std::string &name) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
virtual void setObjectLabel(const std::string &objectLabel)
AmalgamationFactory for subblocks of strided map based amalgamation data.
void applyInverse212(const MultiVector &RHS, MultiVector &X) const
apply 2-1-2 algorithm for 2x2 solve
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void applyInverse121(const MultiVector &RHS, MultiVector &X) const
apply 1-2-1 algorithm for 2x2 solve
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
void SetParameter(const std::string &name, const ParameterEntry &entry)
Set a parameter directly as a ParameterEntry.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
Factory for creating a graph base on a given matrix.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
void Build(Level &currentLevel) const
Build an object with this factory.
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
static Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Read(const std::string &fileName, Xpetra::UnderlyingLib lib, const RCP< const Teuchos::Comm< int > > &comm, bool binary=false)
virtual void SetFactory(const std::string &varName, const RCP< const FactoryBase > &factory)=0
Configuration.
Scalar SC
bool isType(const std::string &name) const
Class for transferring coordinates from a finer level to a coarser one.
Factory for building restriction operators.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
Factory for building coarse matrices.
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building uncoupled aggregates.
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Factory for building a thresholded operator.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
bool is_null() const