MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_REFMAXWELL_DEF_HPP
11 #define MUELU_REFMAXWELL_DEF_HPP
12 
13 #include <sstream>
14 
15 #include "MueLu_ConfigDefs.hpp"
16 
17 #include "Xpetra_Map.hpp"
18 #include "Xpetra_MatrixMatrix.hpp"
21 #include "Xpetra_MatrixUtils.hpp"
22 
24 
25 #include "MueLu_AmalgamationFactory.hpp"
26 #include "MueLu_RAPFactory.hpp"
27 #include "MueLu_SmootherFactory.hpp"
28 
29 #include "MueLu_CoalesceDropFactory.hpp"
30 #include "MueLu_CoarseMapFactory.hpp"
31 #include "MueLu_CoordinatesTransferFactory.hpp"
32 #include "MueLu_UncoupledAggregationFactory.hpp"
33 #include "MueLu_TentativePFactory.hpp"
34 #include "MueLu_SaPFactory.hpp"
35 #include "MueLu_AggregationExportFactory.hpp"
36 #include "MueLu_Utilities.hpp"
37 #include "MueLu_Maxwell_Utils.hpp"
38 
39 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
40 #include "MueLu_TentativePFactory_kokkos.hpp"
41 #include <Kokkos_Core.hpp>
42 #include <KokkosSparse_CrsMatrix.hpp>
43 
44 #include "MueLu_ZoltanInterface.hpp"
45 #include "MueLu_Zoltan2Interface.hpp"
46 #include "MueLu_RepartitionHeuristicFactory.hpp"
47 #include "MueLu_RepartitionFactory.hpp"
48 #include "MueLu_RebalanceAcFactory.hpp"
49 #include "MueLu_RebalanceTransferFactory.hpp"
50 
51 #include "MueLu_VerbosityLevel.hpp"
52 
55 
56 #ifdef HAVE_MUELU_CUDA
57 #include "cuda_profiler_api.h"
58 #endif
59 
60 // Stratimikos
61 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
62 #include <Xpetra_ThyraLinearOp.hpp>
63 #endif
64 
65 namespace MueLu {
66 
67 template <typename T>
68 T pop(Teuchos::ParameterList &pl, std::string const &name_in) {
69  T result = pl.get<T>(name_in);
70  pl.remove(name_in, true);
71  return result;
72 }
73 
74 template <typename T>
75 T pop(Teuchos::ParameterList &pl, std::string const &name_in, T def_value) {
76  T result = pl.get<T>(name_in, def_value);
77  pl.remove(name_in, false);
78  return result;
79 }
80 
81 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
84  return Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(matrix, true)->getCrsMatrix();
85 }
86 
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89  return SM_Matrix_->getDomainMap();
90 }
91 
92 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  return SM_Matrix_->getRangeMap();
95 }
96 
97 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
101  bool useKokkosDefault = !Node::is_serial;
102 
103  RCP<ParameterList> params = rcp(new ParameterList("RefMaxwell"));
104 
105  params->set<RCP<Matrix> >("Dk_1", Teuchos::null);
106  params->set<RCP<Matrix> >("Dk_2", Teuchos::null);
107  params->set<RCP<Matrix> >("D0", Teuchos::null);
108 
109  params->set<RCP<Matrix> >("M1_beta", Teuchos::null);
110  params->set<RCP<Matrix> >("M1_alpha", Teuchos::null);
111  // for backwards compatibility
112  params->set<RCP<Matrix> >("Ms", Teuchos::null);
113 
114  params->set<RCP<Matrix> >("Mk_one", Teuchos::null);
115  params->set<RCP<Matrix> >("Mk_1_one", Teuchos::null);
116  // for backwards compatibility
117  params->set<RCP<Matrix> >("M1", Teuchos::null);
118 
119  params->set<RCP<Matrix> >("invMk_1_invBeta", Teuchos::null);
120  params->set<RCP<Matrix> >("invMk_2_invAlpha", Teuchos::null);
121  // for backwards compatibility
122  params->set<RCP<Matrix> >("M0inv", Teuchos::null);
123 
124  params->set<RCP<MultiVector> >("Nullspace", Teuchos::null);
125  params->set<RCP<RealValuedMultiVector> >("Coordinates", Teuchos::null);
126 
127  auto spaceValidator = rcp(new Teuchos::EnhancedNumberValidator<int>(1, 2));
128  params->set("refmaxwell: space number", 1, "", spaceValidator);
129  params->set("verbosity", MasterList::getDefault<std::string>("verbosity"));
130  params->set("use kokkos refactor", useKokkosDefault);
131  params->set("half precision", false);
132  params->set("parameterlist: syntax", MasterList::getDefault<std::string>("parameterlist: syntax"));
133  params->set("output filename", MasterList::getDefault<std::string>("output filename"));
134  params->set("print initial parameters", MasterList::getDefault<bool>("print initial parameters"));
135  params->set("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
136  params->set("refmaxwell: disable addon 22", true);
137  params->set("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
138  params->set("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
139  params->set("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
140  params->set("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
141  params->set("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
142  params->set("refmaxwell: skip first (2,2) level", false);
143  params->set("multigrid algorithm", "Unsmoothed");
144  params->set("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
145  params->set("rap: triple product", MasterList::getDefault<bool>("rap: triple product"));
146  params->set("rap: fix zero diagonals", true);
147  params->set("rap: fix zero diagonals threshold", MasterList::getDefault<double>("rap: fix zero diagonals threshold"));
148  params->set("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
149  params->set("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
150  params->set("refmaxwell: subsolves striding", 1);
151  params->set("refmaxwell: row sum drop tol (1,1)", MasterList::getDefault<double>("aggregation: row sum drop tol"));
152  params->set("sync timers", false);
153  params->set("refmaxwell: num iters coarse 11", 1);
154  params->set("refmaxwell: num iters 22", 1);
155  params->set("refmaxwell: apply BCs to Anodal", false);
156  params->set("refmaxwell: apply BCs to coarse 11", true);
157  params->set("refmaxwell: apply BCs to 22", true);
158  params->set("refmaxwell: max coarse size", 1);
159 
160  ParameterList &precList11 = params->sublist("refmaxwell: 11list");
161  precList11.disableRecursiveValidation();
162  ParameterList &precList22 = params->sublist("refmaxwell: 22list");
163  precList22.disableRecursiveValidation();
164 
165  params->set("smoother: type", "CHEBYSHEV");
166  ParameterList &smootherList = params->sublist("smoother: params");
167  smootherList.disableRecursiveValidation();
168  params->set("smoother: pre type", "NONE");
169  ParameterList &preSmootherList = params->sublist("smoother: pre params");
170  preSmootherList.disableRecursiveValidation();
171  params->set("smoother: post type", "NONE");
172  ParameterList &postSmootherList = params->sublist("smoother: post params");
173  postSmootherList.disableRecursiveValidation();
174 
175  ParameterList &matvecParams = params->sublist("matvec params");
176  matvecParams.disableRecursiveValidation();
177 
178  params->set("multigrid algorithm", "unsmoothed");
179  params->set("aggregation: type", MasterList::getDefault<std::string>("aggregation: type"));
180  params->set("aggregation: drop tol", MasterList::getDefault<double>("aggregation: drop tol"));
181  params->set("aggregation: drop scheme", MasterList::getDefault<std::string>("aggregation: drop scheme"));
182  params->set("aggregation: distance laplacian algo", MasterList::getDefault<std::string>("aggregation: distance laplacian algo"));
183  params->set("aggregation: min agg size", MasterList::getDefault<int>("aggregation: min agg size"));
184  params->set("aggregation: max agg size", MasterList::getDefault<int>("aggregation: max agg size"));
185  params->set("aggregation: match ML phase1", MasterList::getDefault<bool>("aggregation: match ML phase1"));
186  params->set("aggregation: match ML phase2a", MasterList::getDefault<bool>("aggregation: match ML phase2a"));
187  params->set("aggregation: match ML phase2b", MasterList::getDefault<bool>("aggregation: match ML phase2b"));
188  params->set("aggregation: export visualization data", MasterList::getDefault<bool>("aggregation: export visualization data"));
189 
190  return params;
191 }
192 
193 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
195  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
196  Teuchos::ParameterList newList;
197  {
198  Teuchos::ParameterList newList2 = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list, "refmaxwell"));
199  RCP<Teuchos::ParameterList> validateParameters = getValidParamterList();
200  for (auto it = newList2.begin(); it != newList2.end(); ++it) {
201  const std::string &entry_name = it->first;
202  if (validateParameters->isParameter(entry_name)) {
203  ParameterEntry theEntry = newList2.getEntry(entry_name);
204  newList.setEntry(entry_name, theEntry);
205  }
206  }
207  }
208 
209  if (list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
210  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"), "SA"));
211  if (list.isSublist("refmaxwell: 22list"))
212  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"), "SA"));
213  list = newList;
214  }
215 
216  parameterList_ = list;
217  parameterList_.validateParametersAndSetDefaults(*getValidParamterList());
218  std::string verbosityLevel = parameterList_.get<std::string>("verbosity");
220  std::string outputFilename = parameterList_.get<std::string>("output filename");
221  if (outputFilename != "")
222  VerboseObject::SetMueLuOFileStream(outputFilename);
223  if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
224  VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
225 
226  if (parameterList_.get<bool>("print initial parameters"))
227  GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
228  disable_addon_ = parameterList_.get<bool>("refmaxwell: disable addon");
229  disable_addon_22_ = parameterList_.get<bool>("refmaxwell: disable addon 22");
230  mode_ = parameterList_.get<std::string>("refmaxwell: mode");
231  use_as_preconditioner_ = parameterList_.get<bool>("refmaxwell: use as preconditioner");
232  dump_matrices_ = parameterList_.get<bool>("refmaxwell: dump matrices");
233  enable_reuse_ = parameterList_.get<bool>("refmaxwell: enable reuse");
234  implicitTranspose_ = parameterList_.get<bool>("transpose: use implicit");
235  fuseProlongationAndUpdate_ = parameterList_.get<bool>("fuse prolongation and update");
236  skipFirst11Level_ = parameterList_.get<bool>("refmaxwell: skip first (1,1) level");
237  skipFirst22Level_ = parameterList_.get<bool>("refmaxwell: skip first (2,2) level");
238  if (spaceNumber_ == 1)
239  skipFirst22Level_ = false;
240  syncTimers_ = parameterList_.get<bool>("sync timers");
241  useKokkos_ = parameterList_.get<bool>("use kokkos refactor");
242  numItersCoarse11_ = parameterList_.get<int>("refmaxwell: num iters coarse 11");
243  numIters22_ = parameterList_.get<int>("refmaxwell: num iters 22");
244  applyBCsToAnodal_ = parameterList_.get<bool>("refmaxwell: apply BCs to Anodal");
245  applyBCsToCoarse11_ = parameterList_.get<bool>("refmaxwell: apply BCs to coarse 11");
246  applyBCsTo22_ = parameterList_.get<bool>("refmaxwell: apply BCs to 22");
247 
248  precList11_ = parameterList_.sublist("refmaxwell: 11list");
249  if (!precList11_.isType<std::string>("Preconditioner Type") &&
250  !precList11_.isType<std::string>("smoother: type") &&
251  !precList11_.isType<std::string>("smoother: pre type") &&
252  !precList11_.isType<std::string>("smoother: post type")) {
253  precList11_.set("smoother: type", "CHEBYSHEV");
254  precList11_.sublist("smoother: params").set("chebyshev: degree", 2);
255  precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue", 5.4);
256  precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations", 30);
257  }
258 
259  precList22_ = parameterList_.sublist("refmaxwell: 22list");
260  if (!precList22_.isType<std::string>("Preconditioner Type") &&
261  !precList22_.isType<std::string>("smoother: type") &&
262  !precList22_.isType<std::string>("smoother: pre type") &&
263  !precList22_.isType<std::string>("smoother: post type")) {
264  precList22_.set("smoother: type", "CHEBYSHEV");
265  precList22_.sublist("smoother: params").set("chebyshev: degree", 2);
266  precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue", 7.0);
267  precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations", 30);
268  }
269 
270  if (!parameterList_.isType<std::string>("smoother: type") && !parameterList_.isType<std::string>("smoother: pre type") && !parameterList_.isType<std::string>("smoother: post type")) {
271  list.set("smoother: type", "CHEBYSHEV");
272  list.sublist("smoother: params").set("chebyshev: degree", 2);
273  list.sublist("smoother: params").set("chebyshev: ratio eigenvalue", 20.0);
274  list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations", 30);
275  }
276 
277  if (enable_reuse_ &&
278  !precList11_.isType<std::string>("Preconditioner Type") &&
279  !precList11_.isParameter("reuse: type"))
280  precList11_.set("reuse: type", "full");
281  if (enable_reuse_ &&
282  !precList22_.isType<std::string>("Preconditioner Type") &&
283  !precList22_.isParameter("reuse: type"))
284  precList22_.set("reuse: type", "full");
285 
286  // This should be taken out again as soon as
287  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
288  // drop tol != 0.0
289  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
290  GetOStream(Warnings0) << solverName_ + "::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
291  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
292  precList11_.set("aggregation: drop tol", 0.0);
293  }
294 }
295 
296 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
298  using memory_space = typename Node::device_type::memory_space;
299 
300 #ifdef HAVE_MUELU_CUDA
301  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
302 #endif
303 
304  std::string timerLabel;
305  if (reuse)
306  timerLabel = "compute (reuse)";
307  else
308  timerLabel = "compute";
309  RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
310 
312  // COMMENTED OUT SINCE WE SHOULD NOT NEED THIS ANYMORE.
313  // Remove explicit zeros from matrices
314  // Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(parameterList_,D0_,SM_Matrix_,Mk_one_,M1_beta_);
315  // if (!Dk_1_.is_null())
316  // Dk_1_ = Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(Dk_1_, 1e-10, false);
317 
318  if (IsPrint(Statistics2)) {
319  RCP<ParameterList> params = rcp(new ParameterList());
320  params->set("printLoadBalancingInfo", true);
321  params->set("printCommInfo", true);
322  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
323  }
324 
326  // Detect Dirichlet boundary conditions
327  if (!reuse) {
328  magnitudeType rowSumTol = parameterList_.get<double>("refmaxwell: row sum drop tol (1,1)");
330  BCrows11_, BCcols22_, BCdomain22_,
331  globalNumberBoundaryUnknowns11_,
332  globalNumberBoundaryUnknowns22_,
333  onlyBoundary11_, onlyBoundary22_);
334  if (spaceNumber_ == 2) {
335  Kokkos::View<bool *, memory_space> BCcolsEdge = Kokkos::View<bool *, memory_space>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), Dk_1_->getColMap()->getLocalNumElements());
336  Kokkos::View<bool *, memory_space> BCdomainEdge = Kokkos::View<bool *, memory_space>(Kokkos::ViewAllocateWithoutInitializing("dirichletDomains"), Dk_1_->getDomainMap()->getLocalNumElements());
337  Utilities::DetectDirichletColsAndDomains(*Dk_1_, BCrows11_, BCcolsEdge, BCdomainEdge);
338 
339  Kokkos::View<bool *, memory_space> BCcolsNode = Kokkos::View<bool *, memory_space>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_->getColMap()->getLocalNumElements());
340  Kokkos::View<bool *, memory_space> BCdomainNode = Kokkos::View<bool *, memory_space>(Kokkos::ViewAllocateWithoutInitializing("dirichletDomains"), D0_->getDomainMap()->getLocalNumElements());
341  Utilities::DetectDirichletColsAndDomains(*D0_, BCdomainEdge, BCcolsNode, BCdomainNode);
342  BCdomain22_ = BCdomainNode;
343  }
344  if (IsPrint(Statistics2)) {
345  GetOStream(Statistics2) << solverName_ + "::compute(): Detected " << globalNumberBoundaryUnknowns11_ << " BC rows and " << globalNumberBoundaryUnknowns22_ << " BC columns." << std::endl;
346  }
347  dump(BCrows11_, "BCrows11.m");
348  dump(BCcols22_, "BCcols22.m");
349  dump(BCdomain22_, "BCdomain22.m");
350  }
351 
352  if (onlyBoundary11_) {
353  // All unknowns of the (1,1) block have been detected as boundary unknowns.
354  // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
355  GetOStream(Warnings0) << "All unknowns of the (1,1) block have been detected as boundary unknowns!" << std::endl;
356  mode_ = "none";
357  setFineLevelSmoother11();
358  return;
359  }
360 
362 
363  dim_ = NodalCoords_->getNumVectors();
364 
366  // build special prolongators
367  if (!reuse) {
369  // build nullspace for (1,1)-block (if necessary)
370  if (Nullspace11_ != null) { // no need to do anything - nullspace is built
371  TEUCHOS_ASSERT(Nullspace11_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
372  } else if (NodalCoords_ != null) {
373  Nullspace11_ = buildNullspace(spaceNumber_, BCrows11_, skipFirst11Level_);
374  } else {
375  GetOStream(Errors) << solverName_ + "::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
376  }
377 
378  // build special prolongator for (1,1)-block
379  {
380  RCP<Matrix> A11_nodal;
381  if (skipFirst11Level_) {
382  // Form A11_nodal = D0^T * M1_beta * D0 (aka TMT_agg)
383  std::string label("D0^T*M1_beta*D0");
384  A11_nodal = Maxwell_Utils<SC, LO, GO, NO>::PtAPWrapper(M1_beta_, D0_, parameterList_, label);
385 
386  if (applyBCsToAnodal_) {
387  // Apply boundary conditions to A11_nodal
388  Utilities::ApplyOAZToMatrixRows(A11_nodal, BCdomain22_);
389  }
390  A11_nodal->setObjectLabel(solverName_ + " (1,1) A_nodal");
391  dump(A11_nodal, "A11_nodal.m");
392  }
393  // release it because we won't need it anymore
394  M1_beta_ = Teuchos::null;
395 
396  // build special prolongator
397  buildProlongator(spaceNumber_, A11_nodal, Nullspace11_, P11_, NullspaceCoarse11_, CoordsCoarse11_);
398 
399  dump(P11_, "P11.m");
400  }
401 
403  // build nullspace for (2,2)-block (if necessary)
404  if (Nullspace22_ != null) {
405  TEUCHOS_ASSERT(Nullspace22_->getMap()->isCompatible(*(Dk_1_->getDomainMap())));
406  } else if (NodalCoords_ != null)
407  Nullspace22_ = buildNullspace(spaceNumber_ - 1, BCdomain22_, skipFirst22Level_);
408  else {
409  GetOStream(Errors) << solverName_ + "::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
410  }
411 
412  // build special prolongator for (2,2)-block
413  {
414  RCP<Matrix> A22_nodal;
415  if (skipFirst22Level_) {
416  // Form A22_nodal = D0^T * M1_alpha * D0
417  std::string label("D0^T*M1_alpha*D0");
418  A22_nodal = Maxwell_Utils<SC, LO, GO, NO>::PtAPWrapper(M1_alpha_, D0_, parameterList_, label);
419 
420  if (applyBCsToAnodal_) {
421  // Apply boundary conditions to A22_nodal
422  Utilities::ApplyOAZToMatrixRows(A22_nodal, BCdomain22_);
423  }
424  A22_nodal->setObjectLabel(solverName_ + " (2,2) A_nodal");
425  dump(A22_nodal, "A22_nodal.m");
426  }
427  // release it because we won't need it anymore
428  M1_alpha_ = Teuchos::null;
429 
430  // build special prolongator
431  buildProlongator(spaceNumber_ - 1, A22_nodal, Nullspace22_, P22_, CoarseNullspace22_, Coords22_);
432 
433  dump(P22_, "P22.m");
434  }
435  }
436 
438  // build coarse grid operator for (1,1)-block
439  buildCoarse11Matrix();
440 
442  // determine the communicator sizes for (1,1)- and (2,2)-blocks
443  bool doRebalancing;
444  int rebalanceStriding, numProcsCoarseA11, numProcsA22;
445  if (!reuse)
446  this->determineSubHierarchyCommSizes(doRebalancing, rebalanceStriding, numProcsCoarseA11, numProcsA22);
447  else
448  doRebalancing = false;
449 
450  // rebalance the coarse A11 matrix, as well as P11, CoordsCoarse11 and Addon11
451  if (!reuse && doRebalancing)
452  rebalanceCoarse11Matrix(rebalanceStriding, numProcsCoarseA11);
453  if (!coarseA11_.is_null()) {
454  dump(coarseA11_, "coarseA11.m");
455  if (!reuse) {
456  dumpCoords(CoordsCoarse11_, "CoordsCoarse11.m");
457  dump(NullspaceCoarse11_, "NullspaceCoarse11.m");
458  }
459  }
460 
461  if (!reuse) {
462  if (!implicitTranspose_) {
463  R11_ = Utilities::Transpose(*P11_);
464  dump(R11_, "R11.m");
465  }
466  }
468  // build multigrid for coarse (1,1)-block
469  if (!coarseA11_.is_null()) {
471  std::string label("coarseA11");
472  setupSubSolve(HierarchyCoarse11_, thyraPrecOpH_, coarseA11_, NullspaceCoarse11_, CoordsCoarse11_, precList11_, label, reuse);
473  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
474  }
475 
477  // Apply BCs to columns of Dk_1
478  if (!reuse && applyBCsTo22_) {
479  GetOStream(Runtime0) << solverName_ + "::compute(): nuking BC columns of Dk_1" << std::endl;
480 
481  Dk_1_->resumeFill();
482  Scalar replaceWith = (Dk_1_->getRowMap()->lib() == Xpetra::UseEpetra) ? Teuchos::ScalarTraits<SC>::eps() : Teuchos::ScalarTraits<SC>::zero();
483  Utilities::ZeroDirichletCols(Dk_1_, BCcols22_, replaceWith);
484  Dk_1_->fillComplete(Dk_1_->getDomainMap(), Dk_1_->getRangeMap());
485  }
486 
488  // Build A22 = Dk_1^T SM Dk_1 and hierarchy for A22
489  if (!onlyBoundary22_) {
490  GetOStream(Runtime0) << solverName_ + "::compute(): building MG for (2,2)-block" << std::endl;
491 
492  // Build A22 = Dk_1^T * SM * Dk_1 and rebalance it, as well as Dk_1_ and P22_ and Coords22_
493  build22Matrix(reuse, doRebalancing, rebalanceStriding, numProcsA22);
494 
495  if (!P22_.is_null()) {
496  std::string label("P22^T*A22*P22");
497  coarseA22_ = Maxwell_Utils<SC, LO, GO, NO>::PtAPWrapper(A22_, P22_, parameterList_, label);
498  coarseA22_->SetFixedBlockSize(A22_->GetFixedBlockSize());
499  coarseA22_->setObjectLabel(solverName_ + " coarse (2, 2)");
500  dump(coarseA22_, "coarseA22.m");
501  }
502 
503  if (!reuse && !implicitTranspose_) {
504  Dk_1_T_ = Utilities::Transpose(*Dk_1_);
505  if (!P22_.is_null())
506  R22_ = Utilities::Transpose(*P22_);
507  }
508 
509  if (!A22_.is_null()) {
511  std::string label("A22");
512  if (!P22_.is_null()) {
513  precList22_.sublist("level 1 user data").set("A", coarseA22_);
514  precList22_.sublist("level 1 user data").set("P", P22_);
515  if (!implicitTranspose_)
516  precList22_.sublist("level 1 user data").set("R", R22_);
517  precList22_.sublist("level 1 user data").set("Nullspace", CoarseNullspace22_);
518  precList22_.sublist("level 1 user data").set("Coordinates", Coords22_);
519  // A22 is singular, we want to coarsen at least once.
520  // So we make sure coarseA22 is not just ignored.
521  int maxCoarseSize = precList22_.get("coarse: max size", MasterList::getDefault<int>("coarse: max size"));
522  int numRows = Teuchos::as<int>(coarseA22_->getGlobalNumRows());
523  if (maxCoarseSize > numRows)
524  precList22_.set("coarse: max size", numRows);
525  int maxLevels = precList22_.get("max levels", MasterList::getDefault<int>("max levels"));
526  if (maxLevels < 2)
527  precList22_.set("max levels", 2);
528  setupSubSolve(Hierarchy22_, thyraPrecOp22_, A22_, Teuchos::null, Teuchos::null, precList22_, label, reuse, /*isSingular=*/globalNumberBoundaryUnknowns11_ == 0);
529  } else
530  setupSubSolve(Hierarchy22_, thyraPrecOp22_, A22_, CoarseNullspace22_, Coords22_, precList22_, label, reuse, /*isSingular=*/globalNumberBoundaryUnknowns11_ == 0);
531 
532  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
533  }
534  }
535 
537  // Apply BCs to rows of Dk_1
538  if (!reuse && !onlyBoundary22_ && applyBCsTo22_) {
539  GetOStream(Runtime0) << solverName_ + "::compute(): nuking BC rows of Dk_1" << std::endl;
540 
541  Dk_1_->resumeFill();
542  Scalar replaceWith = (Dk_1_->getRowMap()->lib() == Xpetra::UseEpetra) ? Teuchos::ScalarTraits<SC>::eps() : Teuchos::ScalarTraits<SC>::zero();
543  Utilities::ZeroDirichletRows(Dk_1_, BCrows11_, replaceWith);
544  Dk_1_->fillComplete(Dk_1_->getDomainMap(), Dk_1_->getRangeMap());
545  dump(Dk_1_, "Dk_1_nuked.m");
546  }
547 
549  // Set up the smoother on the finest level
550  setFineLevelSmoother11();
551 
552  if (!reuse) {
553  if (!ImporterCoarse11_.is_null()) {
554  RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterCoarse11_->getTargetMap(), P11_->getColMap());
555  rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterCoarse11_->getTargetMap(), ImporterP11);
556  }
557 
558  if (!Importer22_.is_null()) {
559  if (enable_reuse_) {
560  DorigDomainMap_ = Dk_1_->getDomainMap();
561  DorigImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_)->getCrsMatrix()->getCrsGraph()->getImporter();
562  }
563  RCP<const Import> ImporterD = ImportFactory::Build(Importer22_->getTargetMap(), Dk_1_->getColMap());
564  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD);
565  }
566 
567 #ifdef HAVE_MUELU_TPETRA
568  if ((!Dk_1_T_.is_null()) &&
569  (!R11_.is_null()) &&
570  (!rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_T_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
571  (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
572  (Dk_1_T_->getColMap()->lib() == Xpetra::UseTpetra) &&
573  (R11_->getColMap()->lib() == Xpetra::UseTpetra))
574  Dk_1_T_R11_colMapsMatch_ = Dk_1_T_->getColMap()->isSameAs(*R11_->getColMap());
575  else
576 #endif
577  Dk_1_T_R11_colMapsMatch_ = false;
578  if (Dk_1_T_R11_colMapsMatch_)
579  GetOStream(Runtime0) << solverName_ + "::compute(): Dk_1_T and R11 have matching colMaps" << std::endl;
580 
581  // Allocate MultiVectors for solve
582  allocateMemory(1);
583 
584  // apply matvec params
585  if (parameterList_.isSublist("matvec params")) {
586  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
587  Maxwell_Utils<SC, LO, GO, NO>::setMatvecParams(*SM_Matrix_, matvecParams);
590  if (!Dk_1_T_.is_null()) Maxwell_Utils<SC, LO, GO, NO>::setMatvecParams(*Dk_1_T_, matvecParams);
591  if (!R11_.is_null()) Maxwell_Utils<SC, LO, GO, NO>::setMatvecParams(*R11_, matvecParams);
592  if (!ImporterCoarse11_.is_null()) ImporterCoarse11_->setDistributorParameters(matvecParams);
593  if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
594  }
595  if (!ImporterCoarse11_.is_null() && parameterList_.isSublist("refmaxwell: ImporterCoarse11 params")) {
596  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterCoarse11 params"));
597  ImporterCoarse11_->setDistributorParameters(importerParams);
598  }
599  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")) {
600  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
601  Importer22_->setDistributorParameters(importerParams);
602  }
603  }
604 
605  describe(GetOStream(Runtime0));
606 
607 #ifdef HAVE_MUELU_CUDA
608  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
609 #endif
610 }
611 
612 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
614  determineSubHierarchyCommSizes(bool &doRebalancing, int &rebalanceStriding, int &numProcsCoarseA11, int &numProcsA22) {
615  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators");
616  rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
617  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
618  if (numProcs == 1) {
619  doRebalancing = false;
620  return;
621  }
622 
623 #ifdef HAVE_MPI
624  if (doRebalancing) {
625  {
626  // decide on number of ranks for coarse (1, 1) problem
627 
628  Level level;
629  level.SetFactoryManager(null);
630  level.SetLevelID(0);
631  level.Set("A", coarseA11_);
632 
633  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
634  ParameterList repartheurParams;
635  repartheurParams.set("repartition: start level", 0);
636  // Setting min == target on purpose.
637  int defaultTargetRows = 10000;
638  repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
639  repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
640  repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
641  repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
642  repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
643  repartheurFactory->SetParameterList(repartheurParams);
644 
645  level.Request("number of partitions", repartheurFactory.get());
646  repartheurFactory->Build(level);
647  numProcsCoarseA11 = level.Get<int>("number of partitions", repartheurFactory.get());
648  numProcsCoarseA11 = std::min(numProcsCoarseA11, numProcs);
649  }
650 
651  {
652  // decide on number of ranks for (2, 2) problem
653 
654  Level level;
655  level.SetFactoryManager(null);
656  level.SetLevelID(0);
657 
658  level.Set("Map", Dk_1_->getDomainMap());
659 
660  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
661  ParameterList repartheurParams;
662  repartheurParams.set("repartition: start level", 0);
663  repartheurParams.set("repartition: use map", true);
664  // Setting min == target on purpose.
665  int defaultTargetRows = 10000;
666  repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
667  repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
668  repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
669  repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
670  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
671  repartheurFactory->SetParameterList(repartheurParams);
672 
673  level.Request("number of partitions", repartheurFactory.get());
674  repartheurFactory->Build(level);
675  numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
676  numProcsA22 = std::min(numProcsA22, numProcs);
677  }
678 
679  if (rebalanceStriding >= 1) {
680  TEUCHOS_ASSERT(rebalanceStriding * numProcsCoarseA11 <= numProcs);
681  TEUCHOS_ASSERT(rebalanceStriding * numProcsA22 <= numProcs);
682  if (rebalanceStriding * (numProcsCoarseA11 + numProcsA22) > numProcs) {
683  GetOStream(Warnings0) << solverName_ + "::compute(): Disabling striding = " << rebalanceStriding << ", since coarseA11 needs " << numProcsCoarseA11
684  << " procs and A22 needs " << numProcsA22 << " procs." << std::endl;
685  rebalanceStriding = -1;
686  }
687  int lclBadMatrixDistribution = (coarseA11_->getLocalNumEntries() == 0) || (Dk_1_->getDomainMap()->getLocalNumElements() == 0);
688  int gblBadMatrixDistribution = false;
689  MueLu_maxAll(SM_Matrix_->getDomainMap()->getComm(), lclBadMatrixDistribution, gblBadMatrixDistribution);
690  if (gblBadMatrixDistribution) {
691  GetOStream(Warnings0) << solverName_ + "::compute(): Disabling striding = " << rebalanceStriding << ", since coarseA11 has no entries on at least one rank or Dk_1's domain map has no entries on at least one rank." << std::endl;
692  rebalanceStriding = -1;
693  }
694  }
695 
696  if ((numProcsCoarseA11 < 0) || (numProcsA22 < 0) || (numProcsCoarseA11 + numProcsA22 > numProcs)) {
697  GetOStream(Warnings0) << solverName_ + "::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
698  << "in undesirable number of partitions: " << numProcsCoarseA11 << ", " << numProcsA22 << std::endl;
699  doRebalancing = false;
700  }
701  }
702 #else
703  doRebalancing = false;
704 #endif // HAVE_MPI
705 }
706 
707 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
709  buildAddon(const int spaceNumber) {
710  if (spaceNumber == 0)
711  return Teuchos::null;
712 
713  std::string timerLabel;
714  if (spaceNumber == spaceNumber_) {
715  if (skipFirst11Level_)
716  timerLabel = "Build coarse addon matrix 11";
717  else
718  timerLabel = "Build addon matrix 11";
719  } else
720  timerLabel = "Build addon matrix 22";
721 
722  RCP<Teuchos::TimeMonitor> tmAddon = getTimer(timerLabel);
723 
724  RCP<Matrix> addon;
725  RCP<Matrix> Z;
726  RCP<Matrix> lumpedInverse;
727  if (spaceNumber == spaceNumber_) {
728  // catch a failure
729  TEUCHOS_TEST_FOR_EXCEPTION(invMk_1_invBeta_ == Teuchos::null, std::invalid_argument,
730  solverName_ +
731  "::buildCoarse11Matrix(): Inverse of "
732  "lumped mass matrix required for add-on (i.e. invMk_1_invBeta_ is null)");
733  lumpedInverse = invMk_1_invBeta_;
734 
735  if (skipFirst11Level_) {
736  // construct Zaux = M1 P11
737  RCP<Matrix> Zaux;
738  Zaux = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Mk_one_, false, *P11_, false, Zaux, GetOStream(Runtime0), true, true);
739  // construct Z = D* M1 P11 = D^T Zaux
740  Z = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_, true, *Zaux, false, Z, GetOStream(Runtime0), true, true);
741  } else {
742  // construct Z = D* M1 P11 = D^T Zaux
743  Z = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_, true, *Mk_one_, false, Z, GetOStream(Runtime0), true, true);
744  }
745 
746  } else if (spaceNumber == spaceNumber_ - 1) {
747  // catch a failure
748  TEUCHOS_TEST_FOR_EXCEPTION(invMk_2_invAlpha_ == Teuchos::null, std::invalid_argument,
749  solverName_ +
750  "::buildCoarse11Matrix(): Inverse of "
751  "lumped mass matrix required for add-on (i.e. invMk_2_invAlpha_ is null)");
752  lumpedInverse = invMk_2_invAlpha_;
753 
754  // construct Z = Dk_2^T Mk_1_one
755  Z = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_2_, true, *Mk_1_one_, false, Z, GetOStream(Runtime0), true, true);
756  }
757 
758  // construct Z^T lumpedInverse Z
759  if (lumpedInverse->getGlobalMaxNumRowEntries() <= 1) {
760  // We assume that if lumpedInverse has at most one entry per row then
761  // these are all diagonal entries.
762  RCP<Vector> diag = VectorFactory::Build(lumpedInverse->getRowMap());
763  lumpedInverse->getLocalDiagCopy(*diag);
764  {
765  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
766  for (size_t j = 0; j < diag->getMap()->getLocalNumElements(); j++) {
767  diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
768  }
769  }
770  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
771  Z->leftScale(*diag);
772  else {
773  RCP<Import> importer = ImportFactory::Build(diag->getMap(), Z->getRowMap());
774  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
775  diag2->doImport(*diag, *importer, Xpetra::INSERT);
776  Z->leftScale(*diag2);
777  }
778  addon = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Z, true, *Z, false, addon, GetOStream(Runtime0), true, true);
779  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
780  RCP<Matrix> C2;
781  // construct C2 = lumpedInverse Z
782  C2 = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*lumpedInverse, false, *Z, false, C2, GetOStream(Runtime0), true, true);
783  // construct Matrix2 = Z* M0inv Z = Z* C2
784  addon = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Z, true, *C2, false, addon, GetOStream(Runtime0), true, true);
785  } else {
786  addon = MatrixFactory::Build(Z->getDomainMap());
787  // construct Matrix2 = Z^T lumpedInverse Z
789  MultiplyRAP(*Z, true, *lumpedInverse, false, *Z, false, *addon, true, true);
790  }
791  return addon;
792 }
793 
794 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
796  RCP<Teuchos::TimeMonitor> tm = getTimer("Build coarse (1,1) matrix");
797 
799 
800  // coarse matrix for P11* (M1 + D1* M2 D1) P11
801  RCP<Matrix> temp;
802  temp = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*SM_Matrix_, false, *P11_, false, temp, GetOStream(Runtime0), true, true);
803  if (ImporterCoarse11_.is_null())
804  coarseA11_ = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*P11_, true, *temp, false, coarseA11_, GetOStream(Runtime0), true, true);
805  else {
806  RCP<Matrix> temp2;
807  temp2 = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*P11_, true, *temp, false, temp2, GetOStream(Runtime0), true, true);
808 
809  RCP<const Map> map = ImporterCoarse11_->getTargetMap()->removeEmptyProcesses();
810  temp2->removeEmptyProcessesInPlace(map);
811  if (!temp2.is_null() && temp2->getRowMap().is_null())
812  temp2 = Teuchos::null;
813  coarseA11_ = temp2;
814  }
815 
816  if (!disable_addon_) {
817  RCP<Matrix> addon;
818 
819  if (!coarseA11_.is_null() && Addon11_.is_null()) {
820  addon = buildAddon(spaceNumber_);
821  // Should we keep the addon for next setup?
822  if (enable_reuse_)
823  Addon11_ = addon;
824  } else
825  addon = Addon11_;
826 
827  if (!coarseA11_.is_null()) {
828  // add matrices together
829  RCP<Matrix> newCoarseA11;
830  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*coarseA11_, false, one, *addon, false, one, newCoarseA11, GetOStream(Runtime0));
831  newCoarseA11->fillComplete();
832  coarseA11_ = newCoarseA11;
833  }
834  }
835 
836  if (!coarseA11_.is_null() && !skipFirst11Level_) {
837  ArrayRCP<bool> coarseA11BCrows;
838  coarseA11BCrows.resize(coarseA11_->getRowMap()->getLocalNumElements());
839  for (size_t i = 0; i < BCdomain22_.size(); i++)
840  for (size_t k = 0; k < dim_; k++)
841  coarseA11BCrows[i * dim_ + k] = BCdomain22_(i);
842  magnitudeType rowSumTol = parameterList_.get<double>("refmaxwell: row sum drop tol (1,1)");
843  if (rowSumTol > 0.)
844  Utilities::ApplyRowSumCriterion(*coarseA11_, rowSumTol, coarseA11BCrows);
845  if (applyBCsToCoarse11_)
846  Utilities::ApplyOAZToMatrixRows(coarseA11_, coarseA11BCrows);
847  }
848 
849  if (!coarseA11_.is_null()) {
850  // If we already applied BCs to A_nodal, we likely do not need
851  // to fix up coarseA11.
852  // If we did not apply BCs to A_nodal, we now need to correct
853  // the zero diagonals of coarseA11, since we did nuke the nullspace.
854 
855  bool fixZeroDiagonal = !applyBCsToAnodal_;
856  if (precList11_.isParameter("rap: fix zero diagonals"))
857  fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
858 
859  if (fixZeroDiagonal) {
860  magnitudeType threshold = 1e-16;
861  Scalar replacement = 1.0;
862  if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
863  threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
864  else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
865  threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
866  if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
867  replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
868  Xpetra::MatrixUtils<SC, LO, GO, NO>::CheckRepairMainDiagonal(coarseA11_, true, GetOStream(Warnings1), threshold, replacement);
869  }
870 
871  // Set block size
872  coarseA11_->SetFixedBlockSize(dim_);
873  if (skipFirst11Level_)
874  coarseA11_->setObjectLabel(solverName_ + " coarse (1,1)");
875  else
876  coarseA11_->setObjectLabel(solverName_ + " (1,1)");
877  }
878 }
879 
880 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
882  rebalanceCoarse11Matrix(const int rebalanceStriding, const int numProcsCoarseA11) {
883 #ifdef HAVE_MPI
884  // rebalance coarseA11
885  RCP<Teuchos::TimeMonitor> tm = getTimer("Rebalance coarseA11");
886 
887  Level fineLevel, coarseLevel;
888  fineLevel.SetFactoryManager(null);
889  coarseLevel.SetFactoryManager(null);
890  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
891  fineLevel.SetLevelID(0);
892  coarseLevel.SetLevelID(1);
893  coarseLevel.Set("A", coarseA11_);
894  coarseLevel.Set("P", P11_);
895  coarseLevel.Set("Coordinates", CoordsCoarse11_);
896  if (!NullspaceCoarse11_.is_null())
897  coarseLevel.Set("Nullspace", NullspaceCoarse11_);
898  coarseLevel.Set("number of partitions", numProcsCoarseA11);
899  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
900 
901  coarseLevel.setlib(coarseA11_->getDomainMap()->lib());
902  fineLevel.setlib(coarseA11_->getDomainMap()->lib());
903  coarseLevel.setObjectLabel(solverName_ + " coarse (1,1)");
904  fineLevel.setObjectLabel(solverName_ + " coarse (1,1)");
905 
906  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
907  RCP<Factory> partitioner;
908  if (partName == "zoltan") {
909 #ifdef HAVE_MUELU_ZOLTAN
910  partitioner = rcp(new ZoltanInterface());
911  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
912  // partitioner->SetFactory("number of partitions", repartheurFactory);
913 #else
914  throw Exceptions::RuntimeError("Zoltan interface is not available");
915 #endif
916  } else if (partName == "zoltan2") {
917 #ifdef HAVE_MUELU_ZOLTAN2
918  partitioner = rcp(new Zoltan2Interface());
919  ParameterList partParams;
920  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
921  partParams.set("ParameterList", partpartParams);
922  partitioner->SetParameterList(partParams);
923  // partitioner->SetFactory("number of partitions", repartheurFactory);
924 #else
925  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
926 #endif
927  }
928 
929  auto repartFactory = rcp(new RepartitionFactory());
930  ParameterList repartParams;
931  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
932  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
933  if (rebalanceStriding >= 1) {
934  bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
935  if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsCoarseA11 * rebalanceStriding)
936  acceptPart = false;
937  repartParams.set("repartition: remap accept partition", acceptPart);
938  }
939  repartFactory->SetParameterList(repartParams);
940  // repartFactory->SetFactory("number of partitions", repartheurFactory);
941  repartFactory->SetFactory("Partition", partitioner);
942 
943  auto newP = rcp(new RebalanceTransferFactory());
944  ParameterList newPparams;
945  newPparams.set("type", "Interpolation");
946  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
947  newPparams.set("repartition: use subcommunicators", true);
948  newPparams.set("repartition: rebalance Nullspace", !NullspaceCoarse11_.is_null());
949  newP->SetFactory("Coordinates", NoFactory::getRCP());
950  if (!NullspaceCoarse11_.is_null())
951  newP->SetFactory("Nullspace", NoFactory::getRCP());
952  newP->SetParameterList(newPparams);
953  newP->SetFactory("Importer", repartFactory);
954 
955  auto newA = rcp(new RebalanceAcFactory());
956  ParameterList rebAcParams;
957  rebAcParams.set("repartition: use subcommunicators", true);
958  newA->SetParameterList(rebAcParams);
959  newA->SetFactory("Importer", repartFactory);
960 
961  coarseLevel.Request("P", newP.get());
962  coarseLevel.Request("Importer", repartFactory.get());
963  coarseLevel.Request("A", newA.get());
964  coarseLevel.Request("Coordinates", newP.get());
965  if (!NullspaceCoarse11_.is_null())
966  coarseLevel.Request("Nullspace", newP.get());
967  repartFactory->Build(coarseLevel);
968 
969  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
970  ImporterCoarse11_ = coarseLevel.Get<RCP<const Import> >("Importer", repartFactory.get());
971  P11_ = coarseLevel.Get<RCP<Matrix> >("P", newP.get());
972  coarseA11_ = coarseLevel.Get<RCP<Matrix> >("A", newA.get());
973  CoordsCoarse11_ = coarseLevel.Get<RCP<RealValuedMultiVector> >("Coordinates", newP.get());
974  if (!NullspaceCoarse11_.is_null())
975  NullspaceCoarse11_ = coarseLevel.Get<RCP<MultiVector> >("Nullspace", newP.get());
976 
977  if (!coarseA11_.is_null()) {
978  // Set block size
979  coarseA11_->SetFixedBlockSize(dim_);
980  if (skipFirst11Level_)
981  coarseA11_->setObjectLabel(solverName_ + " coarse (1,1)");
982  else
983  coarseA11_->setObjectLabel(solverName_ + " (1,1)");
984  }
985 
986  coarseA11_AP_reuse_data_ = Teuchos::null;
987  coarseA11_RAP_reuse_data_ = Teuchos::null;
988 
989  if (!disable_addon_ && enable_reuse_) {
990  // Rebalance the addon for next setup
991  RCP<const Import> ImporterCoarse11 = coarseLevel.Get<RCP<const Import> >("Importer", repartFactory.get());
992  RCP<const Map> targetMap = ImporterCoarse11->getTargetMap();
993  ParameterList XpetraList;
994  XpetraList.set("Restrict Communicator", true);
995  Addon11_ = MatrixFactory::Build(Addon11_, *ImporterCoarse11, *ImporterCoarse11, targetMap, targetMap, rcp(&XpetraList, false));
996  }
997 #endif
998 }
999 
1000 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1001 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::build22Matrix(const bool reuse, const bool doRebalancing, const int rebalanceStriding, const int numProcsA22) {
1002  if (!reuse) { // build fine grid operator for (2,2)-block, Dk_1^T SM Dk_1 (aka TMT)
1003  RCP<Teuchos::TimeMonitor> tm = getTimer("Build A22");
1004 
1005  Level fineLevel, coarseLevel;
1006  fineLevel.SetFactoryManager(null);
1007  coarseLevel.SetFactoryManager(null);
1008  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1009  fineLevel.SetLevelID(0);
1010  coarseLevel.SetLevelID(1);
1011  fineLevel.Set("A", SM_Matrix_);
1012  coarseLevel.Set("P", Dk_1_);
1013  coarseLevel.Set("Coordinates", Coords22_);
1014 
1015  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1016  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1017  coarseLevel.setObjectLabel(solverName_ + " (2,2)");
1018  fineLevel.setObjectLabel(solverName_ + " (2,2)");
1019 
1020  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
1021  ParameterList rapList = *(rapFact->GetValidParameterList());
1022  rapList.set("transpose: use implicit", true);
1023  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
1024  rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
1025  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
1026  rapFact->SetParameterList(rapList);
1027 
1028  if (!A22_AP_reuse_data_.is_null()) {
1029  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
1030  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
1031  }
1032  if (!A22_RAP_reuse_data_.is_null()) {
1033  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
1034  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
1035  }
1036 
1037 #ifdef HAVE_MPI
1038  if (doRebalancing) {
1039  coarseLevel.Set("number of partitions", numProcsA22);
1040  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
1041 
1042  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
1043  RCP<Factory> partitioner;
1044  if (partName == "zoltan") {
1045 #ifdef HAVE_MUELU_ZOLTAN
1046  partitioner = rcp(new ZoltanInterface());
1047  partitioner->SetFactory("A", rapFact);
1048  // partitioner->SetFactory("number of partitions", repartheurFactory);
1049  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
1050 #else
1051  throw Exceptions::RuntimeError("Zoltan interface is not available");
1052 #endif
1053  } else if (partName == "zoltan2") {
1054 #ifdef HAVE_MUELU_ZOLTAN2
1055  partitioner = rcp(new Zoltan2Interface());
1056  ParameterList partParams;
1057  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
1058  partParams.set("ParameterList", partpartParams);
1059  partitioner->SetParameterList(partParams);
1060  partitioner->SetFactory("A", rapFact);
1061  // partitioner->SetFactory("number of partitions", repartheurFactory);
1062 #else
1063  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
1064 #endif
1065  }
1066 
1067  auto repartFactory = rcp(new RepartitionFactory());
1068  ParameterList repartParams;
1069  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
1070  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
1071  if (rebalanceStriding >= 1) {
1072  bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize() - 1 - SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
1073  if (SM_Matrix_->getDomainMap()->getComm()->getSize() - 1 - SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22 * rebalanceStriding)
1074  acceptPart = false;
1075  if (acceptPart)
1076  TEUCHOS_ASSERT(coarseA11_.is_null());
1077  repartParams.set("repartition: remap accept partition", acceptPart);
1078  } else
1079  repartParams.set("repartition: remap accept partition", coarseA11_.is_null());
1080  repartFactory->SetParameterList(repartParams);
1081  repartFactory->SetFactory("A", rapFact);
1082  // repartFactory->SetFactory("number of partitions", repartheurFactory);
1083  repartFactory->SetFactory("Partition", partitioner);
1084 
1085  auto newP = rcp(new RebalanceTransferFactory());
1086  ParameterList newPparams;
1087  newPparams.set("type", "Interpolation");
1088  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
1089  newPparams.set("repartition: use subcommunicators", true);
1090  newPparams.set("repartition: rebalance Nullspace", false);
1091  newP->SetFactory("Coordinates", NoFactory::getRCP());
1092  newP->SetParameterList(newPparams);
1093  newP->SetFactory("Importer", repartFactory);
1094 
1095  auto newA = rcp(new RebalanceAcFactory());
1096  ParameterList rebAcParams;
1097  rebAcParams.set("repartition: use subcommunicators", true);
1098  newA->SetParameterList(rebAcParams);
1099  newA->SetFactory("A", rapFact);
1100  newA->SetFactory("Importer", repartFactory);
1101 
1102  coarseLevel.Request("P", newP.get());
1103  coarseLevel.Request("Importer", repartFactory.get());
1104  coarseLevel.Request("A", newA.get());
1105  coarseLevel.Request("Coordinates", newP.get());
1106  rapFact->Build(fineLevel, coarseLevel);
1107  repartFactory->Build(coarseLevel);
1108 
1109  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
1110  Importer22_ = coarseLevel.Get<RCP<const Import> >("Importer", repartFactory.get());
1111  Dk_1_ = coarseLevel.Get<RCP<Matrix> >("P", newP.get());
1112  A22_ = coarseLevel.Get<RCP<Matrix> >("A", newA.get());
1113  Coords22_ = coarseLevel.Get<RCP<RealValuedMultiVector> >("Coordinates", newP.get());
1114 
1115  if (!P22_.is_null()) {
1116  // Todo
1117  }
1118 
1119  } else
1120 #endif // HAVE_MPI
1121  {
1122  coarseLevel.Request("A", rapFact.get());
1123  if (enable_reuse_) {
1124  coarseLevel.Request("AP reuse data", rapFact.get());
1125  coarseLevel.Request("RAP reuse data", rapFact.get());
1126  }
1127 
1128  A22_ = coarseLevel.Get<RCP<Matrix> >("A", rapFact.get());
1129 
1130  if (enable_reuse_) {
1131  if (coarseLevel.IsAvailable("AP reuse data", rapFact.get()))
1132  A22_AP_reuse_data_ = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", rapFact.get());
1133  if (coarseLevel.IsAvailable("RAP reuse data", rapFact.get()))
1134  A22_RAP_reuse_data_ = coarseLevel.Get<RCP<ParameterList> >("RAP reuse data", rapFact.get());
1135  }
1136  }
1137  } else {
1138  RCP<Teuchos::TimeMonitor> tm = getTimer("Build A22");
1139  if (Importer22_.is_null()) {
1140  RCP<Matrix> temp;
1141  temp = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*SM_Matrix_, false, *Dk_1_, false, temp, GetOStream(Runtime0), true, true);
1142  if (!implicitTranspose_)
1143  A22_ = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_T_, false, *temp, false, A22_, GetOStream(Runtime0), true, true);
1144  else
1145  A22_ = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_, true, *temp, false, A22_, GetOStream(Runtime0), true, true);
1146  } else {
1147  // we replaced domain map and importer on D, reverse that
1148  RCP<const Import> Dimporter = rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_)->getCrsMatrix()->getCrsGraph()->getImporter();
1149  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_)->getCrsMatrix()->replaceDomainMapAndImporter(DorigDomainMap_, DorigImporter_);
1150 
1151  RCP<Matrix> temp, temp2;
1152  temp = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*SM_Matrix_, false, *Dk_1_, false, temp, GetOStream(Runtime0), true, true);
1153  if (!implicitTranspose_)
1154  temp2 = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_T_, false, *temp, false, temp2, GetOStream(Runtime0), true, true);
1155  else
1156  temp2 = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Dk_1_, true, *temp, false, temp2, GetOStream(Runtime0), true, true);
1157 
1158  // and back again
1159  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), Dimporter);
1160 
1161  ParameterList XpetraList;
1162  XpetraList.set("Restrict Communicator", true);
1163  XpetraList.set("Timer Label", "MueLu::RebalanceA22");
1164  RCP<const Map> targetMap = Importer22_->getTargetMap();
1165  A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList, false));
1166  }
1167  }
1168 
1169  if (not A22_.is_null() and not disable_addon_22_ and spaceNumber_ > 1) {
1171 
1172  RCP<Matrix> addon22 = buildAddon(spaceNumber_ - 1);
1173 
1174  // add matrices together
1175  RCP<Matrix> newA22;
1176  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*A22_, false, one, *addon22, false, one, newA22, GetOStream(Runtime0));
1177  newA22->fillComplete();
1178  A22_ = newA22;
1179  }
1180 
1181  if (!A22_.is_null()) {
1182  dump(A22_, "A22.m");
1183  A22_->setObjectLabel(solverName_ + " (2,2)");
1184  // Set block size
1185  if (spaceNumber_ - 1 == 0)
1186  A22_->SetFixedBlockSize(1);
1187  else
1188  A22_->SetFixedBlockSize(dim_);
1189  }
1190 }
1191 
1192 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1194  Level level;
1195  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1196  level.SetFactoryManager(factoryHandler);
1197  level.SetLevelID(0);
1198  level.setObjectLabel(solverName_ + " (1,1)");
1199  level.Set("A", SM_Matrix_);
1200  level.setlib(SM_Matrix_->getDomainMap()->lib());
1201  // For Hiptmair
1202  level.Set("NodeMatrix", A22_);
1203  level.Set("D0", Dk_1_);
1204 
1205  if ((parameterList_.get<std::string>("smoother: pre type") != "NONE") && (parameterList_.get<std::string>("smoother: post type") != "NONE")) {
1206  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
1207  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
1208 
1209  ParameterList preSmootherList, postSmootherList;
1210  if (parameterList_.isSublist("smoother: pre params"))
1211  preSmootherList = parameterList_.sublist("smoother: pre params");
1212  if (parameterList_.isSublist("smoother: post params"))
1213  postSmootherList = parameterList_.sublist("smoother: post params");
1214 
1215  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
1216  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
1217  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
1218 
1219  level.Request("PreSmoother", smootherFact.get());
1220  level.Request("PostSmoother", smootherFact.get());
1221  if (enable_reuse_) {
1222  ParameterList smootherFactoryParams;
1223  smootherFactoryParams.set("keep smoother data", true);
1224  smootherFact->SetParameterList(smootherFactoryParams);
1225  level.Request("PreSmoother data", smootherFact.get());
1226  level.Request("PostSmoother data", smootherFact.get());
1227  if (!PreSmootherData11_.is_null())
1228  level.Set("PreSmoother data", PreSmootherData11_, smootherFact.get());
1229  if (!PostSmootherData11_.is_null())
1230  level.Set("PostSmoother data", PostSmootherData11_, smootherFact.get());
1231  }
1232  smootherFact->Build(level);
1233  PreSmoother11_ = level.Get<RCP<SmootherBase> >("PreSmoother", smootherFact.get());
1234  PostSmoother11_ = level.Get<RCP<SmootherBase> >("PostSmoother", smootherFact.get());
1235  if (enable_reuse_) {
1236  PreSmootherData11_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data", smootherFact.get());
1237  PostSmootherData11_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data", smootherFact.get());
1238  }
1239  } else {
1240  std::string smootherType = parameterList_.get<std::string>("smoother: type");
1241 
1242  ParameterList smootherList;
1243  if (parameterList_.isSublist("smoother: params"))
1244  smootherList = parameterList_.sublist("smoother: params");
1245 
1246  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList));
1247  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1248  level.Request("PreSmoother", smootherFact.get());
1249  if (enable_reuse_) {
1250  ParameterList smootherFactoryParams;
1251  smootherFactoryParams.set("keep smoother data", true);
1252  smootherFact->SetParameterList(smootherFactoryParams);
1253  level.Request("PreSmoother data", smootherFact.get());
1254  if (!PreSmootherData11_.is_null())
1255  level.Set("PreSmoother data", PreSmootherData11_, smootherFact.get());
1256  }
1257  smootherFact->Build(level);
1258  PreSmoother11_ = level.Get<RCP<SmootherBase> >("PreSmoother", smootherFact.get());
1259  PostSmoother11_ = PreSmoother11_;
1260  if (enable_reuse_)
1261  PreSmootherData11_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data", smootherFact.get());
1262  }
1263 }
1264 
1265 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1267  RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("Allocate MVs");
1268 
1269  // 11 block
1270  if (!R11_.is_null())
1271  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1272  else
1273  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1274  P11res_->setObjectLabel("P11res");
1275 
1276  if (Dk_1_T_R11_colMapsMatch_) {
1277  DTR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1278  DTR11Tmp_->setObjectLabel("DTR11Tmp");
1279  }
1280  if (!ImporterCoarse11_.is_null()) {
1281  P11resTmp_ = MultiVectorFactory::Build(ImporterCoarse11_->getTargetMap(), numVectors);
1282  P11resTmp_->setObjectLabel("P11resTmp");
1283  P11x_ = MultiVectorFactory::Build(ImporterCoarse11_->getTargetMap(), numVectors);
1284  } else
1285  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1286  P11x_->setObjectLabel("P11x");
1287 
1288  // 22 block
1289  if (!Dk_1_T_.is_null())
1290  Dres_ = MultiVectorFactory::Build(Dk_1_T_->getRangeMap(), numVectors);
1291  else
1292  Dres_ = MultiVectorFactory::Build(Dk_1_->getDomainMap(), numVectors);
1293  Dres_->setObjectLabel("Dres");
1294 
1295  if (!Importer22_.is_null()) {
1296  DresTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1297  DresTmp_->setObjectLabel("DresTmp");
1298  Dx_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1299  } else if (!onlyBoundary22_)
1300  Dx_ = MultiVectorFactory::Build(A22_->getDomainMap(), numVectors);
1301  if (!Dx_.is_null())
1302  Dx_->setObjectLabel("Dx");
1303 
1304  if (!coarseA11_.is_null()) {
1305  if (!ImporterCoarse11_.is_null() && !implicitTranspose_)
1306  P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1307  else
1308  P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1309  P11resSubComm_->replaceMap(coarseA11_->getRangeMap());
1310  P11resSubComm_->setObjectLabel("P11resSubComm");
1311 
1312  P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1313  P11xSubComm_->replaceMap(coarseA11_->getDomainMap());
1314  P11xSubComm_->setObjectLabel("P11xSubComm");
1315  }
1316 
1317  if (!A22_.is_null()) {
1318  if (!Importer22_.is_null() && !implicitTranspose_)
1319  DresSubComm_ = MultiVectorFactory::Build(DresTmp_, Teuchos::View);
1320  else
1321  DresSubComm_ = MultiVectorFactory::Build(Dres_, Teuchos::View);
1322  DresSubComm_->replaceMap(A22_->getRangeMap());
1323  DresSubComm_->setObjectLabel("DresSubComm");
1324 
1325  DxSubComm_ = MultiVectorFactory::Build(Dx_, Teuchos::View);
1326  DxSubComm_->replaceMap(A22_->getDomainMap());
1327  DxSubComm_->setObjectLabel("DxSubComm");
1328  }
1329 
1330  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1331  residual_->setObjectLabel("residual");
1332 }
1333 
1334 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1336  if (dump_matrices_ && !A.is_null()) {
1337  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1339  }
1340 }
1341 
1342 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1344  if (dump_matrices_ && !X.is_null()) {
1345  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1347  }
1348 }
1349 
1350 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1352  if (dump_matrices_ && !X.is_null()) {
1353  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1355  }
1356 }
1357 
1358 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360  if (dump_matrices_) {
1361  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1362  std::ofstream out(name);
1363  for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1364  out << v[i] << "\n";
1365  }
1366 }
1367 
1368 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1369 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::dump(const Kokkos::View<bool *, typename Node::device_type> &v, std::string name) const {
1370  if (dump_matrices_) {
1371  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1372  std::ofstream out(name);
1373  auto vH = Kokkos::create_mirror_view(v);
1374  Kokkos::deep_copy(vH, v);
1375  out << "%%MatrixMarket matrix array real general\n"
1376  << vH.extent(0) << " 1\n";
1377  for (size_t i = 0; i < vH.size(); i++)
1378  out << vH[i] << "\n";
1379  }
1380 }
1381 
1382 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1384  if (IsPrint(Timings)) {
1385  if (!syncTimers_)
1386  return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name)));
1387  else {
1388  if (comm.is_null())
1389  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name), SM_Matrix_->getRowMap()->getComm().ptr()));
1390  else
1391  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name), comm.ptr()));
1392  }
1393  } else
1394  return Teuchos::null;
1395 }
1396 
1397 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1399  buildNullspace(const int spaceNumber, const Kokkos::View<bool *, typename Node::device_type> &bcs, const bool applyBCs) {
1400  std::string spaceLabel;
1401  if (spaceNumber == 0)
1402  spaceLabel = "nodal";
1403  else if (spaceNumber == 1)
1404  spaceLabel = "edge";
1405  else if (spaceNumber == 2)
1406  spaceLabel = "face";
1407  else
1408  TEUCHOS_ASSERT(false);
1409 
1411  if (spaceNumber > 0) {
1412  tm = getTimer("nullspace " + spaceLabel);
1413  GetOStream(Runtime0) << solverName_ + "::compute(): building " + spaceLabel + " nullspace" << std::endl;
1414  }
1415 
1416  if (spaceNumber == 0) {
1417  return Teuchos::null;
1418 
1419  } else if (spaceNumber == 1) {
1420  RCP<MultiVector> CoordsSC;
1421  CoordsSC = Utilities::RealValuedToScalarMultiVector(NodalCoords_);
1422  RCP<MultiVector> Nullspace = MultiVectorFactory::Build(D0_->getRowMap(), NodalCoords_->getNumVectors());
1423  D0_->apply(*CoordsSC, *Nullspace);
1424 
1425  bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
1426 
1427  coordinateType minLen, maxLen, meanLen;
1428  if (IsPrint(Statistics2) || normalize) {
1429  // compute edge lengths
1430  ArrayRCP<ArrayRCP<const Scalar> > localNullspace(dim_);
1431  for (size_t i = 0; i < dim_; i++)
1432  localNullspace[i] = Nullspace->getData(i);
1436  for (size_t j = 0; j < Nullspace->getMap()->getLocalNumElements(); j++) {
1438  for (size_t i = 0; i < dim_; i++)
1439  lenSC += localNullspace[i][j] * localNullspace[i][j];
1441  localMinLen = std::min(localMinLen, len);
1442  localMaxLen = std::max(localMaxLen, len);
1443  localMeanLen += len;
1444  }
1445 
1446  RCP<const Teuchos::Comm<int> > comm = Nullspace->getMap()->getComm();
1447  MueLu_minAll(comm, localMinLen, minLen);
1448  MueLu_sumAll(comm, localMeanLen, meanLen);
1449  MueLu_maxAll(comm, localMaxLen, maxLen);
1450  meanLen /= Nullspace->getMap()->getGlobalNumElements();
1451  }
1452 
1453  if (IsPrint(Statistics2)) {
1454  GetOStream(Statistics2) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
1455  }
1456 
1457  if (normalize) {
1458  // normalize the nullspace
1459  GetOStream(Runtime0) << solverName_ + "::compute(): normalizing nullspace" << std::endl;
1460 
1462 
1463  Array<Scalar> normsSC(NodalCoords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
1464  Nullspace->scale(normsSC());
1465  }
1466 
1467  if (applyBCs) {
1468  // Nuke the BC edges in nullspace
1469  Utilities::ZeroDirichletRows(Nullspace, bcs);
1470  }
1471  dump(Nullspace, "nullspaceEdge.m");
1472 
1473  return Nullspace;
1474 
1475  } else if (spaceNumber == 2) {
1476  using ATS = Kokkos::ArithTraits<Scalar>;
1477  using impl_Scalar = typename ATS::val_type;
1478  using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1479  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1480 
1481  RCP<Matrix> facesToNodes;
1482  {
1485 
1486  // dump(edgesToNodes, "edgesToNodes.m");
1487 
1490  facesToEdges = Maxwell_Utils<SC, LO, GO, NO>::removeExplicitZeros(facesToEdges, 1e-3, false);
1491 
1492  // dump(facesToEdges, "facesToEdges.m");
1493 
1494  facesToNodes = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*facesToEdges, false, *edgesToNodes, false, facesToNodes, GetOStream(Runtime0), true, true);
1496  facesToNodes = Maxwell_Utils<SC, LO, GO, NO>::removeExplicitZeros(facesToNodes, 1e-3, false);
1497  }
1498 
1499  // dump(facesToNodes, "facesToNodes.m");
1500 
1501  RCP<RealValuedMultiVector> ghostedNodalCoordinates;
1502  auto importer = facesToNodes->getCrsGraph()->getImporter();
1503  if (!importer.is_null()) {
1504  ghostedNodalCoordinates = Xpetra::MultiVectorFactory<coordinateType, LocalOrdinal, GlobalOrdinal, Node>::Build(importer->getTargetMap(), dim_);
1505  ghostedNodalCoordinates->doImport(*NodalCoords_, *importer, Xpetra::INSERT);
1506  } else
1507  ghostedNodalCoordinates = NodalCoords_;
1508 
1510  {
1511  auto facesToNodesLocal = facesToNodes->getLocalMatrixDevice();
1512  auto localNodalCoordinates = ghostedNodalCoordinates->getDeviceLocalView(Xpetra::Access::ReadOnly);
1513  auto localFaceNullspace = Nullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
1514 
1515  // enter values
1516  Kokkos::parallel_for(
1517  solverName_ + "::buildFaceProjection_nullspace",
1518  range_type(0, Nullspace->getMap()->getLocalNumElements()),
1519  KOKKOS_LAMBDA(const size_t f) {
1520  size_t n0 = facesToNodesLocal.graph.entries(facesToNodesLocal.graph.row_map(f));
1521  size_t n1 = facesToNodesLocal.graph.entries(facesToNodesLocal.graph.row_map(f) + 1);
1522  size_t n2 = facesToNodesLocal.graph.entries(facesToNodesLocal.graph.row_map(f) + 2);
1523  impl_Scalar elementNullspace00 = localNodalCoordinates(n1, 0) - localNodalCoordinates(n0, 0);
1524  impl_Scalar elementNullspace10 = localNodalCoordinates(n2, 0) - localNodalCoordinates(n0, 0);
1525  impl_Scalar elementNullspace01 = localNodalCoordinates(n1, 1) - localNodalCoordinates(n0, 1);
1526  impl_Scalar elementNullspace11 = localNodalCoordinates(n2, 1) - localNodalCoordinates(n0, 1);
1527  impl_Scalar elementNullspace02 = localNodalCoordinates(n1, 2) - localNodalCoordinates(n0, 2);
1528  impl_Scalar elementNullspace12 = localNodalCoordinates(n2, 2) - localNodalCoordinates(n0, 2);
1529 
1530  localFaceNullspace(f, 0) = impl_ATS::magnitude(elementNullspace01 * elementNullspace12 - elementNullspace02 * elementNullspace11) / 6.0;
1531  localFaceNullspace(f, 1) = impl_ATS::magnitude(elementNullspace02 * elementNullspace10 - elementNullspace00 * elementNullspace12) / 6.0;
1532  localFaceNullspace(f, 2) = impl_ATS::magnitude(elementNullspace00 * elementNullspace11 - elementNullspace01 * elementNullspace10) / 6.0;
1533  });
1534  }
1535 
1536  if (applyBCs) {
1537  // Nuke the BC faces in nullspace
1538  Utilities::ZeroDirichletRows(Nullspace, bcs);
1539  }
1540 
1541  dump(Nullspace, "nullspaceFace.m");
1542 
1543  return Nullspace;
1544 
1545  } else
1546  TEUCHOS_ASSERT(false);
1547 }
1548 
1549 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1552  using ATS = Kokkos::ArithTraits<Scalar>;
1553  using impl_Scalar = typename ATS::val_type;
1554  using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1555  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1556 
1557  typedef typename Matrix::local_matrix_type KCRS;
1558  typedef typename KCRS::StaticCrsGraphType graph_t;
1559  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1560  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1561  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1562 
1563  const impl_Scalar impl_SC_ONE = impl_ATS::one();
1564  const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1565  const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1566 
1567  std::string spaceLabel;
1568  if (spaceNumber == 0)
1569  spaceLabel = "nodal";
1570  else if (spaceNumber == 1)
1571  spaceLabel = "edge";
1572  else if (spaceNumber == 2)
1573  spaceLabel = "face";
1574  else
1575  TEUCHOS_ASSERT(false);
1576 
1578  if (spaceNumber > 0) {
1579  tm = getTimer("projection " + spaceLabel);
1580  GetOStream(Runtime0) << solverName_ + "::compute(): building " + spaceLabel + " projection" << std::endl;
1581  }
1582 
1583  RCP<Matrix> incidence;
1584  if (spaceNumber == 0) {
1585  // identity projection
1586  return Teuchos::null;
1587 
1588  } else if (spaceNumber == 1) {
1589  // D0 is incidence from nodes to edges
1590  incidence = D0_;
1591 
1592  } else if (spaceNumber == 2) {
1593  // get incidence from nodes to faces by multiplying D0 and D1
1594 
1595  TEUCHOS_ASSERT(spaceNumber_ == 2);
1596 
1597  RCP<Matrix> facesToNodes;
1598  {
1600  Maxwell_Utils<SC, LO, GO, NO>::thresholdedAbs(edgesToNodes, 1e-10);
1601 
1602  dump(edgesToNodes, "edgesToNodes.m");
1603 
1605  Maxwell_Utils<SC, LO, GO, NO>::thresholdedAbs(facesToEdges, 1e-10);
1606  // facesToEdges = Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(facesToEdges, 1e-2, false);
1607 
1608  dump(facesToEdges, "facesToEdges.m");
1609 
1610  facesToNodes = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*facesToEdges, false, *edgesToNodes, false, facesToNodes, GetOStream(Runtime0), true, true);
1611  Maxwell_Utils<SC, LO, GO, NO>::thresholdedAbs(facesToNodes, 1e-10);
1612  facesToNodes = Maxwell_Utils<SC, LO, GO, NO>::removeExplicitZeros(facesToNodes, 1e-2, false);
1613  }
1614 
1615  dump(facesToNodes, "facesToNodes.m");
1616 
1617  incidence = facesToNodes;
1618 
1619  } else
1620  TEUCHOS_ASSERT(false);
1621 
1622  size_t dim = dim_;
1623 
1624  // Create maps
1625  RCP<const Map> rowMap = incidence->getRowMap();
1626  RCP<const Map> blockColMap = MapFactory::Build(incidence->getColMap(), dim);
1627  RCP<const Map> blockDomainMap = MapFactory::Build(incidence->getDomainMap(), dim);
1628 
1629  auto localIncidence = incidence->getLocalMatrixDevice();
1630  size_t numLocalRows = rowMap->getLocalNumElements();
1631  size_t numLocalColumns = dim * incidence->getColMap()->getLocalNumElements();
1632  size_t nnzEstimate = dim * localIncidence.graph.entries.size();
1633  lno_view_t rowptr(Kokkos::ViewAllocateWithoutInitializing("projection_rowptr_" + spaceLabel), numLocalRows + 1);
1634  lno_nnz_view_t colind(Kokkos::ViewAllocateWithoutInitializing("projection_colind_" + spaceLabel), nnzEstimate);
1635  scalar_view_t vals("projection_vals_" + spaceLabel, nnzEstimate);
1636 
1637  // set rowpointer
1638  Kokkos::parallel_for(
1639  solverName_ + "::buildProjection_adjustRowptr_" + spaceLabel,
1640  range_type(0, numLocalRows + 1),
1641  KOKKOS_LAMBDA(const size_t i) {
1642  rowptr(i) = dim * localIncidence.graph.row_map(i);
1643  });
1644 
1645  auto localNullspace = Nullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
1646 
1647  // set column indices and values
1648  magnitudeType tol = 1e-5;
1649  Kokkos::parallel_for(
1650  solverName_ + "::buildProjection_enterValues_" + spaceLabel,
1651  range_type(0, numLocalRows),
1652  KOKKOS_LAMBDA(const size_t f) {
1653  for (size_t jj = localIncidence.graph.row_map(f); jj < localIncidence.graph.row_map(f + 1); jj++) {
1654  for (size_t k = 0; k < dim; k++) {
1655  colind(dim * jj + k) = dim * localIncidence.graph.entries(jj) + k;
1656  if (impl_ATS::magnitude(localIncidence.values(jj)) > tol)
1657  vals(dim * jj + k) = impl_half * localNullspace(f, k);
1658  else
1659  vals(dim * jj + k) = impl_SC_ZERO;
1660  }
1661  }
1662  });
1663 
1664  // Create matrix
1665  typename CrsMatrix::local_matrix_type lclProjection("local projection " + spaceLabel,
1666  numLocalRows, numLocalColumns, nnzEstimate,
1667  vals, rowptr, colind);
1668  RCP<Matrix> projection = MatrixFactory::Build(lclProjection,
1669  rowMap, blockColMap,
1670  blockDomainMap, rowMap);
1671 
1672  return projection;
1673 }
1674 
1675 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1677  Teuchos::RCP<Matrix> &P_nodal,
1678  Teuchos::RCP<MultiVector> &Nullspace_nodal,
1679  Teuchos::RCP<RealValuedMultiVector> &CoarseCoords_nodal) const {
1680  RCP<Teuchos::TimeMonitor> tm = getTimer("nodal prolongator");
1681  GetOStream(Runtime0) << solverName_ + "::compute(): building nodal prolongator" << std::endl;
1682 
1683  // build prolongator: algorithm 1 in the reference paper
1684  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1685 
1686  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1687 
1688  {
1689  Level fineLevel, coarseLevel;
1690  fineLevel.SetFactoryManager(null);
1691  coarseLevel.SetFactoryManager(null);
1692  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1693  fineLevel.SetLevelID(0);
1694  coarseLevel.SetLevelID(1);
1695  fineLevel.Set("A", A_nodal);
1696  fineLevel.Set("Coordinates", NodalCoords_);
1697  fineLevel.Set("DofsPerNode", 1);
1698  coarseLevel.setlib(A_nodal->getDomainMap()->lib());
1699  fineLevel.setlib(A_nodal->getDomainMap()->lib());
1700  coarseLevel.setObjectLabel(A_nodal->getObjectLabel());
1701  fineLevel.setObjectLabel(A_nodal->getObjectLabel());
1702 
1703  LocalOrdinal NSdim = 1;
1704  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal->getRowMap(), NSdim);
1705  nullSpace->putScalar(SC_ONE);
1706  fineLevel.Set("Nullspace", nullSpace);
1707 
1708  std::string algo = parameterList_.get<std::string>("multigrid algorithm");
1709 
1710  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1711  amalgFact = rcp(new AmalgamationFactory());
1712  coarseMapFact = rcp(new CoarseMapFactory());
1713  Tfact = rcp(new CoordinatesTransferFactory());
1714  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1715  if (useKokkos_) {
1716  dropFact = rcp(new CoalesceDropFactory_kokkos());
1717  TentativePFact = rcp(new TentativePFactory_kokkos());
1718  } else {
1719  dropFact = rcp(new CoalesceDropFactory());
1720  TentativePFact = rcp(new TentativePFactory());
1721  }
1722  if (algo == "sa")
1723  SaPFact = rcp(new SaPFactory());
1724  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1725 
1726  double dropTol = parameterList_.get<double>("aggregation: drop tol");
1727  std::string dropScheme = parameterList_.get<std::string>("aggregation: drop scheme");
1728  std::string distLaplAlgo = parameterList_.get<std::string>("aggregation: distance laplacian algo");
1729  dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(dropTol));
1730  dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(dropScheme));
1731  if (!useKokkos_)
1732  dropFact->SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(distLaplAlgo));
1733 
1734  UncoupledAggFact->SetFactory("Graph", dropFact);
1735  int minAggSize = parameterList_.get<int>("aggregation: min agg size");
1736  UncoupledAggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minAggSize));
1737  int maxAggSize = parameterList_.get<int>("aggregation: max agg size");
1738  UncoupledAggFact->SetParameter("aggregation: max agg size", Teuchos::ParameterEntry(maxAggSize));
1739  bool matchMLbehavior1 = parameterList_.get<bool>("aggregation: match ML phase1");
1740  UncoupledAggFact->SetParameter("aggregation: match ML phase1", Teuchos::ParameterEntry(matchMLbehavior1));
1741  bool matchMLbehavior2a = parameterList_.get<bool>("aggregation: match ML phase2a");
1742  UncoupledAggFact->SetParameter("aggregation: match ML phase2a", Teuchos::ParameterEntry(matchMLbehavior2a));
1743  bool matchMLbehavior2b = parameterList_.get<bool>("aggregation: match ML phase2b");
1744  UncoupledAggFact->SetParameter("aggregation: match ML phase2b", Teuchos::ParameterEntry(matchMLbehavior2b));
1745 
1746  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1747 
1748  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1749  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1750  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1751 
1752  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1753  Tfact->SetFactory("CoarseMap", coarseMapFact);
1754 
1755  if (algo == "sa") {
1756  SaPFact->SetFactory("P", TentativePFact);
1757  coarseLevel.Request("P", SaPFact.get());
1758  } else
1759  coarseLevel.Request("P", TentativePFact.get());
1760  coarseLevel.Request("Nullspace", TentativePFact.get());
1761  coarseLevel.Request("Coordinates", Tfact.get());
1762 
1764  bool exportVizData = parameterList_.get<bool>("aggregation: export visualization data");
1765  if (exportVizData) {
1766  aggExport = rcp(new AggregationExportFactory());
1767  ParameterList aggExportParams;
1768  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1769  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1770  aggExport->SetParameterList(aggExportParams);
1771 
1772  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1773  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1774  fineLevel.Request("Aggregates", UncoupledAggFact.get());
1775  fineLevel.Request("UnAmalgamationInfo", amalgFact.get());
1776  }
1777 
1778  if (algo == "sa")
1779  coarseLevel.Get("P", P_nodal, SaPFact.get());
1780  else
1781  coarseLevel.Get("P", P_nodal, TentativePFact.get());
1782  coarseLevel.Get("Nullspace", Nullspace_nodal, TentativePFact.get());
1783  coarseLevel.Get("Coordinates", CoarseCoords_nodal, Tfact.get());
1784 
1785  if (exportVizData)
1786  aggExport->Build(fineLevel, coarseLevel);
1787  }
1788 }
1789 
1790 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1793  RCP<Teuchos::TimeMonitor> tm = getTimer("vectorial nodal prolongator");
1794  GetOStream(Runtime0) << solverName_ + "::compute(): building vectorial nodal prolongator" << std::endl;
1795 
1796  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1797 
1798  typedef typename Matrix::local_matrix_type KCRS;
1799  typedef typename KCRS::StaticCrsGraphType graph_t;
1800  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1801  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1802  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1803 
1804  size_t dim = dim_;
1805 
1806  // Create the matrix object
1807  RCP<Map> blockRowMap = MapFactory::Build(P_nodal->getRowMap(), dim);
1808  RCP<Map> blockColMap = MapFactory::Build(P_nodal->getColMap(), dim);
1809  RCP<Map> blockDomainMap = MapFactory::Build(P_nodal->getDomainMap(), dim);
1810 
1811  // Get data out of P_nodal.
1812  auto localP_nodal = P_nodal->getLocalMatrixDevice();
1813 
1814  size_t numLocalRows = blockRowMap->getLocalNumElements();
1815  size_t numLocalColumns = blockColMap->getLocalNumElements();
1816  size_t nnzEstimate = dim * localP_nodal.graph.entries.size();
1817  lno_view_t rowptr(Kokkos::ViewAllocateWithoutInitializing("vectorPNodal_rowptr"), numLocalRows + 1);
1818  lno_nnz_view_t colind(Kokkos::ViewAllocateWithoutInitializing("vectorPNodal_colind"), nnzEstimate);
1819  scalar_view_t vals(Kokkos::ViewAllocateWithoutInitializing("vectorPNodal_vals"), nnzEstimate);
1820 
1821  // fill rowpointer
1822  Kokkos::parallel_for(
1823  solverName_ + "::buildVectorNodalProlongator_adjustRowptr",
1824  range_type(0, localP_nodal.numRows() + 1),
1825  KOKKOS_LAMBDA(const LocalOrdinal i) {
1826  if (i < localP_nodal.numRows()) {
1827  for (size_t k = 0; k < dim; k++) {
1828  rowptr(dim * i + k) = dim * localP_nodal.graph.row_map(i) + k;
1829  }
1830  } else
1831  rowptr(dim * localP_nodal.numRows()) = dim * localP_nodal.graph.row_map(i);
1832  });
1833 
1834  // fill column indices and values
1835  Kokkos::parallel_for(
1836  solverName_ + "::buildVectorNodalProlongator_adjustColind",
1837  range_type(0, localP_nodal.graph.entries.size()),
1838  KOKKOS_LAMBDA(const size_t jj) {
1839  for (size_t k = 0; k < dim; k++) {
1840  colind(dim * jj + k) = dim * localP_nodal.graph.entries(jj) + k;
1841  // vals(dim*jj+k) = localP_nodal.values(jj);
1842  vals(dim * jj + k) = 1.;
1843  }
1844  });
1845 
1846  typename CrsMatrix::local_matrix_type lclVectorNodalP("local vector nodal prolongator",
1847  numLocalRows, numLocalColumns, nnzEstimate,
1848  vals, rowptr, colind);
1849  RCP<Matrix> vectorNodalP = MatrixFactory::Build(lclVectorNodalP,
1850  blockRowMap, blockColMap,
1851  blockDomainMap, blockRowMap);
1852 
1853  return vectorNodalP;
1854 }
1855 
1856 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1858  buildProlongator(const int spaceNumber,
1859  const Teuchos::RCP<Matrix> &A_nodal,
1860  const Teuchos::RCP<MultiVector> &Nullspace,
1861  Teuchos::RCP<Matrix> &Prolongator,
1862  Teuchos::RCP<MultiVector> &coarseNullspace,
1863  Teuchos::RCP<RealValuedMultiVector> &coarseNodalCoords) const {
1864  using ATS = Kokkos::ArithTraits<Scalar>;
1865  using impl_Scalar = typename ATS::val_type;
1866  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1867 
1868  std::string typeStr;
1869  switch (spaceNumber) {
1870  case 0:
1871  typeStr = "node";
1872  TEUCHOS_ASSERT(A_nodal.is_null());
1873  break;
1874  case 1:
1875  typeStr = "edge";
1876  break;
1877  case 2:
1878  typeStr = "face";
1879  break;
1880  default:
1881  TEUCHOS_ASSERT(false);
1882  }
1883 
1884  const bool skipFirstLevel = !A_nodal.is_null();
1885 
1887  if (spaceNumber > 0) {
1888  tm = getTimer("special prolongator " + typeStr);
1889  GetOStream(Runtime0) << solverName_ + "::compute(): building special " + typeStr + " prolongator" << std::endl;
1890  }
1891 
1892  RCP<Matrix> projection = buildProjection(spaceNumber, Nullspace);
1893  dump(projection, typeStr + "Projection.m");
1894 
1895  if (skipFirstLevel) {
1896  RCP<Matrix> P_nodal;
1897  RCP<MultiVector> coarseNodalNullspace;
1898 
1899  buildNodalProlongator(A_nodal, P_nodal, coarseNodalNullspace, coarseNodalCoords);
1900 
1901  dump(P_nodal, "P_nodal_" + typeStr + ".m");
1902  dump(coarseNodalNullspace, "coarseNullspace_nodal_" + typeStr + ".m");
1903 
1904  RCP<Matrix> vectorP_nodal = buildVectorNodalProlongator(P_nodal);
1905 
1906  dump(vectorP_nodal, "vectorP_nodal_" + typeStr + ".m");
1907 
1908  Prolongator = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*projection, false, *vectorP_nodal, false, Prolongator, GetOStream(Runtime0), true, true);
1909 
1910  // This is how ML computes P22 for Darcy.
1911  // The difference is the scaling by nonzeros. I don't think that that is actually needed.
1912  //
1913  // if (spaceNumber==2) {
1914 
1915  // RCP<Matrix> facesToNodes, aggsToFaces;
1916  // {
1917  // RCP<Matrix> edgesToNodes = Xpetra::MatrixFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::BuildCopy(D0_);
1918  // Maxwell_Utils<SC,LO,GO,NO>::thresholdedAbs(edgesToNodes, 1e-10);
1919 
1920  // dump(edgesToNodes, "edgesToNodes.m");
1921 
1922  // RCP<Matrix> facesToEdges = Xpetra::MatrixFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::BuildCopy(Dk_1_);
1923  // Maxwell_Utils<SC,LO,GO,NO>::thresholdedAbs(facesToEdges, 1e-10);
1924  // // facesToEdges = Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(facesToEdges, 1e-2, false);
1925 
1926  // dump(facesToEdges, "facesToEdges.m");
1927 
1928  // facesToNodes = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*facesToEdges,false,*edgesToNodes,false,facesToNodes,GetOStream(Runtime0),true,true);
1929  // Maxwell_Utils<SC,LO,GO,NO>::thresholdedAbs(facesToNodes, 1e-10);
1930  // facesToNodes = Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(facesToNodes, 1e-2, false);
1931  // }
1932  // aggsToFaces = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*facesToNodes,false,*P_nodal,false,aggsToFaces,GetOStream(Runtime0),true,true);
1933 
1934  // auto localP = Prolongator->getLocalMatrixDevice();
1935  // auto localAggsToFaces = aggsToFaces->getLocalMatrixDevice();
1936  // auto localNullspace = Nullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
1937 
1938  // size_t dim = dim_;
1939  // Kokkos::parallel_for(solverName_+"::buildVectorNodalProlongator_adjustRowptr",
1940  // range_type(0,localP.numRows()),
1941  // KOKKOS_LAMBDA(const LocalOrdinal i) {
1942  // LocalOrdinal nonzeros = localAggsToFaces.graph.row_map(i+1)-localAggsToFaces.graph.row_map(i);
1943  // for (LocalOrdinal jj = localAggsToFaces.graph.row_map(i); jj < localAggsToFaces.graph.row_map(i+1); jj++ ) {
1944  // LocalOrdinal j = localAggsToFaces.graph.entries(jj);
1945  // for (LocalOrdinal k = 0; k<dim; k++)
1946  // for (LocalOrdinal kk = localP.graph.row_map(i); kk < localP.graph.row_map(i+1); kk++)
1947  // if (localP.graph.entries(kk) == (dim * j+k)) {
1948  // localP.values(kk) = localNullspace(i, k) / nonzeros;
1949  // break;
1950  // }
1951  // }
1952  // });
1953  // }
1954  //
1955 
1956  size_t dim = dim_;
1957  coarseNullspace = MultiVectorFactory::Build(vectorP_nodal->getDomainMap(), dim);
1958 
1959  auto localNullspace_nodal = coarseNodalNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
1960  auto localNullspace_coarse = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
1961  Kokkos::parallel_for(
1962  solverName_ + "::buildProlongator_nullspace_" + typeStr,
1963  range_type(0, coarseNodalNullspace->getLocalLength()),
1964  KOKKOS_LAMBDA(const size_t i) {
1965  impl_Scalar val = localNullspace_nodal(i, 0);
1966  for (size_t j = 0; j < dim; j++)
1967  localNullspace_coarse(dim * i + j, j) = val;
1968  });
1969 
1970  } else {
1971  Prolongator = projection;
1972  coarseNodalCoords = NodalCoords_;
1973 
1974  if (spaceNumber == 0) {
1975  // nothing, just use the default constant vector
1976  } else if (spaceNumber >= 1) {
1977  size_t dim = dim_;
1978  coarseNullspace = MultiVectorFactory::Build(projection->getDomainMap(), dim);
1979  auto localNullspace_coarse = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
1980  Kokkos::parallel_for(
1981  solverName_ + "::buildProlongator_nullspace_" + typeStr,
1982  range_type(0, coarseNullspace->getLocalLength() / dim),
1983  KOKKOS_LAMBDA(const size_t i) {
1984  for (size_t j = 0; j < dim; j++)
1985  localNullspace_coarse(dim * i + j, j) = 1.0;
1986  });
1987  }
1988  }
1989 }
1990 
1991 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1993  Teuchos::RCP<Operator> &thyraPrecOp,
1994  const Teuchos::RCP<Matrix> &A,
1995  const Teuchos::RCP<MultiVector> &Nullspace,
1997  Teuchos::ParameterList &params,
1998  std::string &label,
1999  const bool reuse,
2000  const bool isSingular) {
2001  int oldRank = SetProcRankVerbose(A->getDomainMap()->getComm()->getRank());
2002  if (IsPrint(Statistics2)) {
2003  RCP<ParameterList> pl = rcp(new ParameterList());
2004  pl->set("printLoadBalancingInfo", true);
2005  pl->set("printCommInfo", true);
2006  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A, label, pl);
2007  }
2008 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2009  if (params.isType<std::string>("Preconditioner Type")) {
2010  TEUCHOS_ASSERT(!reuse);
2011  // build a Stratimikos preconditioner
2012  if (params.get<std::string>("Preconditioner Type") == "MueLu") {
2013  ParameterList &userParamList = params.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
2014  if (!Nullspace.is_null())
2015  userParamList.set<RCP<MultiVector> >("Nullspace", Nullspace);
2016  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords);
2017  }
2018  thyraPrecOp = rcp(new XpetraThyraLinearOp<Scalar, LocalOrdinal, GlobalOrdinal, Node>(coarseA11_, rcp(&params, false)));
2019  } else
2020 #endif
2021  {
2022  // build a MueLu hierarchy
2023 
2024  if (!reuse) {
2025  ParameterList &userParamList = params.sublist("user data");
2026  if (!Coords.is_null())
2027  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords);
2028  if (!Nullspace.is_null())
2029  userParamList.set<RCP<MultiVector> >("Nullspace", Nullspace);
2030 
2031  if (isSingular) {
2032  std::string coarseType = "";
2033  if (params.isParameter("coarse: type")) {
2034  coarseType = params.get<std::string>("coarse: type");
2035  // Transform string to "Abcde" notation
2036  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
2037  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
2038  }
2039  if ((coarseType == "" ||
2040  coarseType == "Klu" ||
2041  coarseType == "Klu2" ||
2042  coarseType == "Superlu" ||
2043  coarseType == "Superlu_dist" ||
2044  coarseType == "Superludist" ||
2045  coarseType == "Basker" ||
2046  coarseType == "Cusolver" ||
2047  coarseType == "Tacho") &&
2048  (!params.isSublist("coarse: params") ||
2049  !params.sublist("coarse: params").isParameter("fix nullspace")))
2050  params.sublist("coarse: params").set("fix nullspace", true);
2051  }
2052 
2053  hierarchy = MueLu::CreateXpetraPreconditioner(A, params);
2054  } else {
2055  RCP<MueLu::Level> level0 = hierarchy->GetLevel(0);
2056  level0->Set("A", A);
2057  hierarchy->SetupRe();
2058  }
2059  }
2060  SetProcRankVerbose(oldRank);
2061 }
2062 
2063 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2065  bool reuse = !SM_Matrix_.is_null();
2066  SM_Matrix_ = SM_Matrix_new;
2067  dump(SM_Matrix_, "SM.m");
2068  if (ComputePrec)
2069  compute(reuse);
2070 }
2071 
2072 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2073 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const {
2074  // residual(SM_Matrix_, X, RHS, residual_)
2075  //
2076  // P11res_ = P11_^T*residual_ or P11res_ = R11_*residual_
2077  //
2078  // Dres_ = Dk_1_^T*residual or Dres_ = Dk_1_T_*residual
2079  //
2080  // if ImporterCoarse11_ is not null
2081  // ImporterCoarse11: P11res_ -> P11resTmp_
2082  // if Importer22_ is not null
2083  // Importer22: Dres_ -> DresTmp_
2084  //
2085  // if coarseA11 is not null
2086  //
2087  // Hierarchy11(P11resSubComm, P11xSubComm) P11resSubComm aliases P11res or P11resTmp
2088  // P11xSubComm aliases P11x
2089  //
2090  // if A22 is not null
2091  //
2092  // Hierarchy22(DresSubComm, DxSubComm) DresSubComm aliases Dres or DresTmp
2093  // DxSubComm aliases Dx
2094  //
2095  // if ImporterCoarse11_ is not null
2096  // ImporterCoarse11: P11xTmp_ -> P11x
2097  // if Importer22_ is not null
2098  // Importer22: DxTmp_ -> Dx_
2099  //
2100  // if fuse
2101  // X += P11*P11x
2102  // X += P11*Dx
2103  // else
2104  // residual = P11*P11x
2105  // residual += Dk_1*Dx
2106  // X += residual
2107 
2109 
2110  { // compute residual
2111 
2112  RCP<Teuchos::TimeMonitor> tmRes = getTimer("residual calculation");
2113  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2114  }
2115 
2116  { // restrict residual to sub-hierarchies
2117 
2118  if (implicitTranspose_) {
2119  {
2120  RCP<Teuchos::TimeMonitor> tmRes = getTimer("restriction coarse (1,1) (implicit)");
2121  P11_->apply(*residual_, *P11res_, Teuchos::TRANS);
2122  }
2123  if (!onlyBoundary22_) {
2124  RCP<Teuchos::TimeMonitor> tmD = getTimer("restriction (2,2) (implicit)");
2125  Dk_1_->apply(*residual_, *Dres_, Teuchos::TRANS);
2126  }
2127  } else {
2128  if (Dk_1_T_R11_colMapsMatch_) {
2129  // Column maps of D_T and R11 match, and we're running Tpetra
2130  {
2131  RCP<Teuchos::TimeMonitor> tmD = getTimer("restrictions import");
2132  DTR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2133  }
2134  if (!onlyBoundary22_) {
2135  RCP<Teuchos::TimeMonitor> tmD = getTimer("restriction (2,2) (explicit)");
2136  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(Dk_1_T_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*DTR11Tmp_), toTpetra(*Dres_), Teuchos::NO_TRANS);
2137  }
2138  {
2139  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("restriction coarse (1,1) (explicit)");
2140  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*DTR11Tmp_), toTpetra(*P11res_), Teuchos::NO_TRANS);
2141  }
2142  } else {
2143  {
2144  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("restriction coarse (1,1) (explicit)");
2145  R11_->apply(*residual_, *P11res_, Teuchos::NO_TRANS);
2146  }
2147  if (!onlyBoundary22_) {
2148  RCP<Teuchos::TimeMonitor> tmD = getTimer("restriction (2,2) (explicit)");
2149  Dk_1_T_->apply(*residual_, *Dres_, Teuchos::NO_TRANS);
2150  }
2151  }
2152  }
2153  }
2154 
2155  {
2156  RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("subsolves");
2157 
2158  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2159 
2160  if (!ImporterCoarse11_.is_null() && !implicitTranspose_) {
2161  RCP<Teuchos::TimeMonitor> tmH = getTimer("import coarse (1,1)");
2162  P11resTmp_->beginImport(*P11res_, *ImporterCoarse11_, Xpetra::INSERT);
2163  }
2164  if (!onlyBoundary22_ && !Importer22_.is_null() && !implicitTranspose_) {
2165  RCP<Teuchos::TimeMonitor> tm22 = getTimer("import (2,2)");
2166  DresTmp_->beginImport(*Dres_, *Importer22_, Xpetra::INSERT);
2167  }
2168 
2169  // iterate on coarse (1, 1) block
2170  if (!coarseA11_.is_null()) {
2171  if (!ImporterCoarse11_.is_null() && !implicitTranspose_)
2172  P11resTmp_->endImport(*P11res_, *ImporterCoarse11_, Xpetra::INSERT);
2173 
2174  RCP<Teuchos::TimeMonitor> tmH = getTimer("solve coarse (1,1)", coarseA11_->getRowMap()->getComm());
2175 
2176 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2177  if (!thyraPrecOpH_.is_null()) {
2179  thyraPrecOpH_->apply(*P11resSubComm_, *P11xSubComm_, Teuchos::NO_TRANS, one, zero);
2180  } else
2181 #endif
2182  HierarchyCoarse11_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersCoarse11_, true);
2183  }
2184 
2185  // iterate on (2, 2) block
2186  if (!A22_.is_null()) {
2187  if (!onlyBoundary22_ && !Importer22_.is_null() && !implicitTranspose_)
2188  DresTmp_->endImport(*Dres_, *Importer22_, Xpetra::INSERT);
2189 
2190  RCP<Teuchos::TimeMonitor> tm22 = getTimer("solve (2,2)", A22_->getRowMap()->getComm());
2191 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2192  if (!thyraPrecOp22_.is_null()) {
2194  thyraPrecOp22_->apply(*DresSubComm_, *DxSubComm_, Teuchos::NO_TRANS, one, zero);
2195  } else
2196 #endif
2197  Hierarchy22_->Iterate(*DresSubComm_, *DxSubComm_, numIters22_, true);
2198  }
2199 
2200  if (coarseA11_.is_null() && !ImporterCoarse11_.is_null() && !implicitTranspose_)
2201  P11resTmp_->endImport(*P11res_, *ImporterCoarse11_, Xpetra::INSERT);
2202  if (A22_.is_null() && !onlyBoundary22_ && !Importer22_.is_null() && !implicitTranspose_)
2203  DresTmp_->endImport(*Dres_, *Importer22_, Xpetra::INSERT);
2204  }
2205 
2206  if (fuseProlongationAndUpdate_) {
2207  { // prolongate (1,1) block
2208  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("prolongation coarse (1,1) (fused)");
2209  P11_->apply(*P11x_, X, Teuchos::NO_TRANS, one, one);
2210  }
2211 
2212  if (!onlyBoundary22_) { // prolongate (2,2) block
2213  RCP<Teuchos::TimeMonitor> tmD = getTimer("prolongation (2,2) (fused)");
2214  Dk_1_->apply(*Dx_, X, Teuchos::NO_TRANS, one, one);
2215  }
2216  } else {
2217  { // prolongate (1,1) block
2218  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("prolongation coarse (1,1) (unfused)");
2219  P11_->apply(*P11x_, *residual_, Teuchos::NO_TRANS);
2220  }
2221 
2222  if (!onlyBoundary22_) { // prolongate (2,2) block
2223  RCP<Teuchos::TimeMonitor> tmD = getTimer("prolongation (2,2) (unfused)");
2224  Dk_1_->apply(*Dx_, *residual_, Teuchos::NO_TRANS, one, one);
2225  }
2226 
2227  { // update current solution
2228  RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("update");
2229  X.update(one, *residual_, one);
2230  }
2231  }
2232 }
2233 
2234 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2235 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::solveH(const MultiVector &RHS, MultiVector &X) const {
2237 
2238  { // compute residual
2239  RCP<Teuchos::TimeMonitor> tmRes = getTimer("residual calculation");
2240  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2241  if (implicitTranspose_)
2242  P11_->apply(*residual_, *P11res_, Teuchos::TRANS);
2243  else
2244  R11_->apply(*residual_, *P11res_, Teuchos::NO_TRANS);
2245  }
2246 
2247  { // solve coarse (1,1) block
2248  if (!ImporterCoarse11_.is_null() && !implicitTranspose_) {
2249  RCP<Teuchos::TimeMonitor> tmH = getTimer("import coarse (1,1)");
2250  P11resTmp_->doImport(*P11res_, *ImporterCoarse11_, Xpetra::INSERT);
2251  }
2252  if (!coarseA11_.is_null()) {
2253  RCP<Teuchos::TimeMonitor> tmH = getTimer("solve coarse (1,1)", coarseA11_->getRowMap()->getComm());
2254  HierarchyCoarse11_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersCoarse11_, true);
2255  }
2256  }
2257 
2258  { // update current solution
2259  RCP<Teuchos::TimeMonitor> tmUp = getTimer("update");
2260  P11_->apply(*P11x_, *residual_, Teuchos::NO_TRANS);
2261  X.update(one, *residual_, one);
2262  }
2263 }
2264 
2265 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2266 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::solve22(const MultiVector &RHS, MultiVector &X) const {
2267  if (onlyBoundary22_)
2268  return;
2269 
2271 
2272  { // compute residual
2273  RCP<Teuchos::TimeMonitor> tmRes = getTimer("residual calculation");
2274  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2275  if (implicitTranspose_)
2276  Dk_1_->apply(*residual_, *Dres_, Teuchos::TRANS);
2277  else
2278  Dk_1_T_->apply(*residual_, *Dres_, Teuchos::NO_TRANS);
2279  }
2280 
2281  { // solve (2,2) block
2282  if (!Importer22_.is_null() && !implicitTranspose_) {
2283  RCP<Teuchos::TimeMonitor> tm22 = getTimer("import (2,2)");
2284  DresTmp_->doImport(*Dres_, *Importer22_, Xpetra::INSERT);
2285  }
2286  if (!A22_.is_null()) {
2287  RCP<Teuchos::TimeMonitor> tm22 = getTimer("solve (2,2)", A22_->getRowMap()->getComm());
2288  Hierarchy22_->Iterate(*DresSubComm_, *DxSubComm_, numIters22_, true);
2289  }
2290  }
2291 
2292  { // update current solution
2293  RCP<Teuchos::TimeMonitor> tmUp = getTimer("update");
2294  Dk_1_->apply(*Dx_, *residual_, Teuchos::NO_TRANS);
2295  X.update(one, *residual_, one);
2296  }
2297 }
2298 
2299 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2300 void RefMaxwell<Scalar, LocalOrdinal, GlobalOrdinal, Node>::apply(const MultiVector &RHS, MultiVector &X,
2301  Teuchos::ETransp /* mode */,
2302  Scalar /* alpha */,
2303  Scalar /* beta */) const {
2304  RCP<Teuchos::TimeMonitor> tm = getTimer("solve");
2305 
2306  // make sure that we have enough temporary memory
2307  if (!onlyBoundary11_ && X.getNumVectors() != P11res_->getNumVectors())
2308  allocateMemory(X.getNumVectors());
2309 
2310  { // apply pre-smoothing
2311 
2312  RCP<Teuchos::TimeMonitor> tmSm = getTimer("smoothing");
2313 
2314  PreSmoother11_->Apply(X, RHS, use_as_preconditioner_);
2315  }
2316 
2317  // do solve for the 2x2 block system
2318  if (mode_ == "additive")
2319  applyInverseAdditive(RHS, X);
2320  else if (mode_ == "121") {
2321  solveH(RHS, X);
2322  solve22(RHS, X);
2323  solveH(RHS, X);
2324  } else if (mode_ == "212") {
2325  solve22(RHS, X);
2326  solveH(RHS, X);
2327  solve22(RHS, X);
2328  } else if (mode_ == "1")
2329  solveH(RHS, X);
2330  else if (mode_ == "2")
2331  solve22(RHS, X);
2332  else if (mode_ == "7") {
2333  solveH(RHS, X);
2334  { // apply pre-smoothing
2335 
2336  RCP<Teuchos::TimeMonitor> tmSm = getTimer("smoothing");
2337 
2338  PreSmoother11_->Apply(X, RHS, false);
2339  }
2340  solve22(RHS, X);
2341  { // apply post-smoothing
2342 
2343  RCP<Teuchos::TimeMonitor> tmSm = getTimer("smoothing");
2344 
2345  PostSmoother11_->Apply(X, RHS, false);
2346  }
2347  solveH(RHS, X);
2348  } else if (mode_ == "none") {
2349  // do nothing
2350  } else
2351  applyInverseAdditive(RHS, X);
2352 
2353  { // apply post-smoothing
2354 
2355  RCP<Teuchos::TimeMonitor> tmSm = getTimer("smoothing");
2356 
2357  PostSmoother11_->Apply(X, RHS, false);
2358  }
2359 }
2360 
2361 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2363  return false;
2364 }
2365 
2366 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2369  Teuchos::ParameterList &List,
2370  bool ComputePrec) {
2371  int spaceNumber = List.get<int>("refmaxwell: space number", 1);
2372 
2373  RCP<Matrix> Dk_1, Dk_2, D0;
2374  RCP<Matrix> M1_beta, M1_alpha;
2375  RCP<Matrix> Mk_one, Mk_1_one;
2376  RCP<Matrix> invMk_1_invBeta, invMk_2_invAlpha;
2377  RCP<MultiVector> Nullspace11, Nullspace22;
2378  RCP<RealValuedMultiVector> NodalCoords;
2379 
2380  Dk_1 = pop(List, "Dk_1", Dk_1);
2381  Dk_2 = pop<RCP<Matrix> >(List, "Dk_2", Dk_2);
2382  D0 = pop<RCP<Matrix> >(List, "D0", D0);
2383 
2384  M1_beta = pop<RCP<Matrix> >(List, "M1_beta", M1_beta);
2385  M1_alpha = pop<RCP<Matrix> >(List, "M1_alpha", M1_alpha);
2386 
2387  Mk_one = pop<RCP<Matrix> >(List, "Mk_one", Mk_one);
2388  Mk_1_one = pop<RCP<Matrix> >(List, "Mk_1_one", Mk_1_one);
2389 
2390  invMk_1_invBeta = pop<RCP<Matrix> >(List, "invMk_1_invBeta", invMk_1_invBeta);
2391  invMk_2_invAlpha = pop<RCP<Matrix> >(List, "invMk_2_invAlpha", invMk_2_invAlpha);
2392 
2393  Nullspace11 = pop<RCP<MultiVector> >(List, "Nullspace11", Nullspace11);
2394  Nullspace22 = pop<RCP<MultiVector> >(List, "Nullspace22", Nullspace22);
2395  NodalCoords = pop<RCP<RealValuedMultiVector> >(List, "Coordinates", NodalCoords);
2396 
2397  // old parameter names
2398  if (List.isType<RCP<Matrix> >("Ms")) {
2399  if (M1_beta.is_null())
2400  M1_beta = pop<RCP<Matrix> >(List, "Ms");
2401  else
2402  TEUCHOS_ASSERT(false);
2403  }
2404  if (List.isType<RCP<Matrix> >("M1")) {
2405  if (Mk_one.is_null())
2406  Mk_one = pop<RCP<Matrix> >(List, "M1");
2407  else
2408  TEUCHOS_ASSERT(false);
2409  }
2410  if (List.isType<RCP<Matrix> >("M0inv")) {
2411  if (invMk_1_invBeta.is_null())
2412  invMk_1_invBeta = pop<RCP<Matrix> >(List, "M0inv");
2413  else
2414  TEUCHOS_ASSERT(false);
2415  }
2416  if (List.isType<RCP<MultiVector> >("Nullspace")) {
2417  if (Nullspace11.is_null())
2418  Nullspace11 = pop<RCP<MultiVector> >(List, "Nullspace");
2419  else
2420  TEUCHOS_ASSERT(false);
2421  }
2422 
2423  if (spaceNumber == 1) {
2424  if (Dk_1.is_null())
2425  Dk_1 = D0;
2426  else if (D0.is_null())
2427  D0 = Dk_1;
2428  if (M1_beta.is_null())
2429  M1_beta = Mk_one;
2430  } else if (spaceNumber == 2) {
2431  if (Dk_2.is_null())
2432  Dk_2 = D0;
2433  else if (D0.is_null())
2434  D0 = Dk_2;
2435  }
2436 
2437  initialize(spaceNumber,
2438  Dk_1, Dk_2, D0,
2439  M1_beta, M1_alpha,
2440  Mk_one, Mk_1_one,
2441  invMk_1_invBeta, invMk_2_invAlpha,
2442  Nullspace11, Nullspace22,
2443  NodalCoords,
2444  List);
2445 
2446  if (SM_Matrix != Teuchos::null)
2447  resetMatrix(SM_Matrix, ComputePrec);
2448 }
2449 
2450 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2453  const Teuchos::RCP<Matrix> &Ms_Matrix,
2454  const Teuchos::RCP<Matrix> &M0inv_Matrix,
2455  const Teuchos::RCP<Matrix> &M1_Matrix,
2456  const Teuchos::RCP<MultiVector> &Nullspace11,
2457  const Teuchos::RCP<RealValuedMultiVector> &NodalCoords,
2458  Teuchos::ParameterList &List) {
2459  initialize(1,
2460  D0_Matrix, Teuchos::null, D0_Matrix,
2461  Ms_Matrix, Teuchos::null,
2462  M1_Matrix, Teuchos::null,
2463  M0inv_Matrix, Teuchos::null,
2464  Nullspace11, Teuchos::null,
2465  NodalCoords,
2466  List);
2467 }
2468 
2469 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2471  initialize(const int k,
2472  const Teuchos::RCP<Matrix> &Dk_1,
2473  const Teuchos::RCP<Matrix> &Dk_2,
2474  const Teuchos::RCP<Matrix> &D0,
2475  const Teuchos::RCP<Matrix> &M1_beta,
2476  const Teuchos::RCP<Matrix> &M1_alpha,
2477  const Teuchos::RCP<Matrix> &Mk_one,
2478  const Teuchos::RCP<Matrix> &Mk_1_one,
2479  const Teuchos::RCP<Matrix> &invMk_1_invBeta,
2480  const Teuchos::RCP<Matrix> &invMk_2_invAlpha,
2481  const Teuchos::RCP<MultiVector> &Nullspace11,
2482  const Teuchos::RCP<MultiVector> &Nullspace22,
2483  const Teuchos::RCP<RealValuedMultiVector> &NodalCoords,
2484  Teuchos::ParameterList &List) {
2485  spaceNumber_ = k;
2486  if (spaceNumber_ == 1)
2487  solverName_ = "RefMaxwell";
2488  else if (spaceNumber_ == 2)
2489  solverName_ = "RefDarcy";
2490  else
2491  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
2492  "spaceNumber needs to be 1 (HCurl) or 2 (HDiv)");
2493  HierarchyCoarse11_ = Teuchos::null;
2494  Hierarchy22_ = Teuchos::null;
2495  PreSmoother11_ = Teuchos::null;
2496  PostSmoother11_ = Teuchos::null;
2497  disable_addon_ = false;
2498  disable_addon_22_ = true;
2499  mode_ = "additive";
2500 
2501  // set parameters
2502  setParameters(List);
2503 
2504  // some pre-conditions
2505  TEUCHOS_ASSERT((k == 1) || (k == 2));
2506  // Need Dk_1
2507  TEUCHOS_ASSERT(Dk_1 != Teuchos::null);
2508  // Need D0 for aggregation
2509  TEUCHOS_ASSERT(D0 != Teuchos::null);
2510 
2511  // Need M1_beta for aggregation
2512  TEUCHOS_ASSERT(M1_beta != Teuchos::null);
2513  // Need M1_alpha for aggregation if k>=1
2514  if (k >= 2)
2515  TEUCHOS_ASSERT(M1_alpha != Teuchos::null);
2516 
2517  if (!disable_addon_) {
2518  // Need Mk_one and invMk_1_invBeta for addon11
2519  TEUCHOS_ASSERT(Mk_one != Teuchos::null);
2520  TEUCHOS_ASSERT(invMk_1_invBeta != Teuchos::null);
2521  }
2522 
2523  if ((k >= 2) && !disable_addon_22_) {
2524  // Need Dk_2, Mk_1_one and invMk_2_invAlpha for addon22
2525  TEUCHOS_ASSERT(Dk_2 != Teuchos::null);
2526  TEUCHOS_ASSERT(Mk_1_one != Teuchos::null);
2527  TEUCHOS_ASSERT(invMk_2_invAlpha != Teuchos::null);
2528  }
2529 
2530 #ifdef HAVE_MUELU_DEBUG
2531 
2532  TEUCHOS_ASSERT(D0->getRangeMap()->isSameAs(*D0->getRowMap()));
2533 
2534  // M1_beta is square
2535  TEUCHOS_ASSERT(M1_beta->getDomainMap()->isSameAs(*M1_beta->getRangeMap()));
2536  TEUCHOS_ASSERT(M1_beta->getDomainMap()->isSameAs(*M1_beta->getRowMap()));
2537 
2538  // M1_beta is consistent with D0
2539  TEUCHOS_ASSERT(M1_beta->getDomainMap()->isSameAs(*D0->getRangeMap()));
2540 
2541  if (k >= 2) {
2542  // M1_alpha is square
2543  TEUCHOS_ASSERT(M1_alpha->getDomainMap()->isSameAs(*M1_alpha->getRangeMap()));
2544  TEUCHOS_ASSERT(M1_alpha->getDomainMap()->isSameAs(*M1_alpha->getRowMap()));
2545 
2546  // M1_alpha is consistent with D0
2547  TEUCHOS_ASSERT(M1_alpha->getDomainMap()->isSameAs(*D0->getRangeMap()))
2548  }
2549 
2550  if (!disable_addon_) {
2551  // Mk_one is square
2552  TEUCHOS_ASSERT(Mk_one->getDomainMap()->isSameAs(*Mk_one->getRangeMap()));
2553  TEUCHOS_ASSERT(Mk_one->getDomainMap()->isSameAs(*Mk_one->getRowMap()));
2554 
2555  // Mk_one is consistent with Dk_1
2556  TEUCHOS_ASSERT(Mk_one->getDomainMap()->isSameAs(*Dk_1->getRangeMap()));
2557 
2558  // invMk_1_invBeta is square
2559  TEUCHOS_ASSERT(invMk_1_invBeta->getDomainMap()->isSameAs(*invMk_1_invBeta->getRangeMap()));
2560  TEUCHOS_ASSERT(invMk_1_invBeta->getDomainMap()->isSameAs(*invMk_1_invBeta->getRowMap()));
2561 
2562  // invMk_1_invBeta is consistent with Dk_1
2563  TEUCHOS_ASSERT(Mk_one->getDomainMap()->isSameAs(*Dk_1->getRangeMap()));
2564  }
2565 
2566  if ((k >= 2) && !disable_addon_22_) {
2567  // Mk_1_one is square
2568  TEUCHOS_ASSERT(Mk_1_one->getDomainMap()->isSameAs(*Mk_1_one->getRangeMap()));
2569  TEUCHOS_ASSERT(Mk_1_one->getDomainMap()->isSameAs(*Mk_1_one->getRowMap()));
2570 
2571  // Mk_1_one is consistent with Dk_1
2572  TEUCHOS_ASSERT(Mk_1_one->getDomainMap()->isSameAs(*Dk_1->getDomainMap()));
2573 
2574  // Mk_1_one is consistent with Dk_2
2575  TEUCHOS_ASSERT(Mk_1_one->getDomainMap()->isSameAs(*Dk_2->getRangeMap()));
2576 
2577  // invMk_2_invAlpha is square
2578  TEUCHOS_ASSERT(invMk_2_invAlpha->getDomainMap()->isSameAs(*invMk_2_invAlpha->getRangeMap()));
2579  TEUCHOS_ASSERT(invMk_2_invAlpha->getDomainMap()->isSameAs(*invMk_2_invAlpha->getRowMap()));
2580 
2581  // invMk_2_invAlpha is consistent with Dk_2
2582  TEUCHOS_ASSERT(invMk_2_invAlpha->getDomainMap()->isSameAs(*Dk_2->getDomainMap()));
2583  }
2584 #endif
2585 
2586  D0_ = D0;
2587  if (Dk_1->getRowMap()->lib() == Xpetra::UseTpetra) {
2588  // We will remove boundary conditions from Dk_1, and potentially change maps, so we copy the input.
2589  // Fortunately, Dk_1 is quite sparse.
2590  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2591 
2592  RCP<Matrix> Dk_1copy = MatrixFactory::Build(Dk_1->getRowMap(), Dk_1->getColMap(), 0);
2593  RCP<CrsMatrix> Dk_1copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(Dk_1copy, true)->getCrsMatrix();
2594  ArrayRCP<const size_t> Dk_1rowptr_RCP;
2595  ArrayRCP<const LO> Dk_1colind_RCP;
2596  ArrayRCP<const SC> Dk_1vals_RCP;
2597  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1, true)->getCrsMatrix()->getAllValues(Dk_1rowptr_RCP, Dk_1colind_RCP, Dk_1vals_RCP);
2598 
2599  ArrayRCP<size_t> Dk_1copyrowptr_RCP;
2600  ArrayRCP<LO> Dk_1copycolind_RCP;
2601  ArrayRCP<SC> Dk_1copyvals_RCP;
2602  Dk_1copyCrs->allocateAllValues(Dk_1vals_RCP.size(), Dk_1copyrowptr_RCP, Dk_1copycolind_RCP, Dk_1copyvals_RCP);
2603  Dk_1copyrowptr_RCP.deepCopy(Dk_1rowptr_RCP());
2604  Dk_1copycolind_RCP.deepCopy(Dk_1colind_RCP());
2605  Dk_1copyvals_RCP.deepCopy(Dk_1vals_RCP());
2606  Dk_1copyCrs->setAllValues(Dk_1copyrowptr_RCP,
2607  Dk_1copycolind_RCP,
2608  Dk_1copyvals_RCP);
2609  Dk_1copyCrs->expertStaticFillComplete(Dk_1->getDomainMap(), Dk_1->getRangeMap(),
2610  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1, true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2611  rcp_dynamic_cast<CrsMatrixWrap>(Dk_1, true)->getCrsMatrix()->getCrsGraph()->getExporter());
2612  Dk_1_ = Dk_1copy;
2613  } else
2614  Dk_1_ = MatrixFactory::BuildCopy(Dk_1);
2615 
2616  if ((!Dk_2.is_null()) && (Dk_2->getRowMap()->lib() == Xpetra::UseTpetra)) {
2617  // We will remove boundary conditions from Dk_2, and potentially change maps, so we copy the input.
2618  // Fortunately, Dk_2 is quite sparse.
2619  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2620 
2621  RCP<Matrix> Dk_2copy = MatrixFactory::Build(Dk_2->getRowMap(), Dk_2->getColMap(), 0);
2622  RCP<CrsMatrix> Dk_2copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(Dk_2copy, true)->getCrsMatrix();
2623  ArrayRCP<const size_t> Dk_2rowptr_RCP;
2624  ArrayRCP<const LO> Dk_2colind_RCP;
2625  ArrayRCP<const SC> Dk_2vals_RCP;
2626  rcp_dynamic_cast<CrsMatrixWrap>(Dk_2, true)->getCrsMatrix()->getAllValues(Dk_2rowptr_RCP, Dk_2colind_RCP, Dk_2vals_RCP);
2627 
2628  ArrayRCP<size_t> Dk_2copyrowptr_RCP;
2629  ArrayRCP<LO> Dk_2copycolind_RCP;
2630  ArrayRCP<SC> Dk_2copyvals_RCP;
2631  Dk_2copyCrs->allocateAllValues(Dk_2vals_RCP.size(), Dk_2copyrowptr_RCP, Dk_2copycolind_RCP, Dk_2copyvals_RCP);
2632  Dk_2copyrowptr_RCP.deepCopy(Dk_2rowptr_RCP());
2633  Dk_2copycolind_RCP.deepCopy(Dk_2colind_RCP());
2634  Dk_2copyvals_RCP.deepCopy(Dk_2vals_RCP());
2635  Dk_2copyCrs->setAllValues(Dk_2copyrowptr_RCP,
2636  Dk_2copycolind_RCP,
2637  Dk_2copyvals_RCP);
2638  Dk_2copyCrs->expertStaticFillComplete(Dk_2->getDomainMap(), Dk_2->getRangeMap(),
2639  rcp_dynamic_cast<CrsMatrixWrap>(Dk_2, true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2640  rcp_dynamic_cast<CrsMatrixWrap>(Dk_2, true)->getCrsMatrix()->getCrsGraph()->getExporter());
2641  Dk_2_ = Dk_2copy;
2642  } else if (!Dk_2.is_null())
2643  Dk_2_ = MatrixFactory::BuildCopy(Dk_2);
2644 
2645  M1_beta_ = M1_beta;
2646  M1_alpha_ = M1_alpha;
2647 
2648  Mk_one_ = Mk_one;
2649  Mk_1_one_ = Mk_1_one;
2650 
2651  invMk_1_invBeta_ = invMk_1_invBeta;
2652  invMk_2_invAlpha_ = invMk_2_invAlpha;
2653 
2654  NodalCoords_ = NodalCoords;
2655  Nullspace11_ = Nullspace11;
2656  Nullspace22_ = Nullspace22;
2657 
2658  dump(D0_, "D0.m");
2659  dump(Dk_1_, "Dk_1_clean.m");
2660  dump(Dk_2_, "Dk_2_clean.m");
2661 
2662  dump(M1_beta_, "M1_beta.m");
2663  dump(M1_alpha_, "M1_alpha.m");
2664 
2665  dump(Mk_one_, "Mk_one.m");
2666  dump(Mk_1_one_, "Mk_1_one.m");
2667 
2668  dump(invMk_1_invBeta_, "invMk_1_invBeta.m");
2669  dump(invMk_2_invAlpha_, "invMk_2_invAlpha.m");
2670 
2671  dumpCoords(NodalCoords_, "coords.m");
2672 }
2673 
2674 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2676  describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2677  std::ostringstream oss;
2678 
2679  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2680 
2681 #ifdef HAVE_MPI
2682  int root;
2683  if (!coarseA11_.is_null())
2684  root = comm->getRank();
2685  else
2686  root = -1;
2687 
2688  int actualRoot;
2689  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2690  root = actualRoot;
2691 #endif
2692 
2693  oss << "\n--------------------------------------------------------------------------------\n"
2694  << "--- " + solverName_ +
2695  " Summary ---\n"
2696  "--------------------------------------------------------------------------------"
2697  << std::endl;
2698  oss << std::endl;
2699 
2700  GlobalOrdinal numRows;
2701  GlobalOrdinal nnz;
2702 
2703  SM_Matrix_->getRowMap()->getComm()->barrier();
2704 
2705  numRows = SM_Matrix_->getGlobalNumRows();
2706  nnz = SM_Matrix_->getGlobalNumEntries();
2707 
2708  Xpetra::global_size_t tt = numRows;
2709  int rowspacer = 3;
2710  while (tt != 0) {
2711  tt /= 10;
2712  rowspacer++;
2713  }
2714  tt = nnz;
2715  int nnzspacer = 2;
2716  while (tt != 0) {
2717  tt /= 10;
2718  nnzspacer++;
2719  }
2720 
2721  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2722  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2723 
2724  if (!A22_.is_null()) {
2725  numRows = A22_->getGlobalNumRows();
2726  nnz = A22_->getGlobalNumEntries();
2727 
2728  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2729  }
2730 
2731  oss << std::endl;
2732 
2733  {
2734  if (PreSmoother11_ != null && PreSmoother11_ == PostSmoother11_)
2735  oss << "Smoother 11 both : " << PreSmoother11_->description() << std::endl;
2736  else {
2737  oss << "Smoother 11 pre : "
2738  << (PreSmoother11_ != null ? PreSmoother11_->description() : "no smoother") << std::endl;
2739  oss << "Smoother 11 post : "
2740  << (PostSmoother11_ != null ? PostSmoother11_->description() : "no smoother") << std::endl;
2741  }
2742  }
2743  oss << std::endl;
2744 
2745  std::string outstr = oss.str();
2746 
2747 #ifdef HAVE_MPI
2748  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2749  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2750 
2751  int strLength = outstr.size();
2752  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2753  if (comm->getRank() != root)
2754  outstr.resize(strLength);
2755  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2756 #endif
2757 
2758  out << outstr;
2759 
2760  if (!HierarchyCoarse11_.is_null())
2761  HierarchyCoarse11_->describe(out, GetVerbLevel());
2762 
2763  if (!Hierarchy22_.is_null())
2764  Hierarchy22_->describe(out, GetVerbLevel());
2765 
2766  if (IsPrint(Statistics2)) {
2767  // Print the grid of processors
2768  std::ostringstream oss2;
2769 
2770  oss2 << "Sub-solver distribution over ranks" << std::endl;
2771  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2772 
2773  int numProcs = comm->getSize();
2774 #ifdef HAVE_MPI
2775  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2776  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2777  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2778 #endif
2779 
2780  char status = 0;
2781  if (!coarseA11_.is_null())
2782  status += 1;
2783  if (!A22_.is_null())
2784  status += 2;
2785  std::vector<char> states(numProcs, 0);
2786 #ifdef HAVE_MPI
2787  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2788 #else
2789  states.push_back(status);
2790 #endif
2791 
2792  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2793  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2794  for (int j = 0; j < rowWidth; j++)
2795  if (proc + j < numProcs)
2796  if (states[proc + j] == 0)
2797  oss2 << ".";
2798  else if (states[proc + j] == 1)
2799  oss2 << "1";
2800  else if (states[proc + j] == 2)
2801  oss2 << "2";
2802  else
2803  oss2 << "B";
2804  else
2805  oss2 << " ";
2806 
2807  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2808  }
2809  oss2 << std::endl;
2810  GetOStream(Statistics2) << oss2.str();
2811  }
2812 }
2813 
2814 } // namespace MueLu
2815 
2816 #define MUELU_REFMAXWELL_SHORT
2817 #endif // ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
RCP< Matrix > buildAddon(const int spaceNumber)
void initialize(int *argc, char ***argv)
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
This class specifies the default factory that should generate some data on a Level if the data does n...
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns &amp; domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
ParameterList & setEntry(const std::string &name, U &&entry)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
ConstIterator end() const
MueLu::DefaultLocalOrdinal LocalOrdinal
Factory for determing the number of partitions for rebalancing.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Factory for generating coarse level map. Used by TentativePFactory.
RCP< Level > & GetLevel(const int levelID=0)
Retrieve a certain level from hierarchy.
void dumpCoords(const RCP< RealValuedMultiVector > &X, std::string name) const
dump out real-valued multivector
const Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
virtual void SetFactory(const std::string &varName, const RCP< const FactoryBase > &factory)
Configuration.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
ParameterList & disableRecursiveValidation()
#define MueLu_maxAll(rcpComm, in, out)
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::coordinateType, LocalOrdinal, GlobalOrdinal, Node >> X)
T & get(const std::string &name, T def_value)
void setFineLevelSmoother11()
Set the fine level smoother.
Class that encapsulates external library smoothers.
static void MultiplyRAP(const Matrix &R, bool transposeR, const Matrix &A, bool transposeA, const Matrix &P, bool transposeP, Matrix &Ac, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static T squareroot(T x)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
static magnitudeType real(T a)
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
Teuchos::RCP< Matrix > buildProjection(const int spaceNumber, const RCP< MultiVector > &EdgeNullspace) const
Builds a projection from a vector values space into a vector valued nodal space.
void buildNodalProlongator(const Teuchos::RCP< Matrix > &A_nodal, Teuchos::RCP< Matrix > &P_nodal, Teuchos::RCP< MultiVector > &Nullspace_nodal, Teuchos::RCP< RealValuedMultiVector > &Coords_nodal) const
One-liner description of what is happening.
void determineSubHierarchyCommSizes(bool &doRebalancing, int &rebalanceStriding, int &numProcsCoarseA11, int &numProcsA22)
Determine how large the sub-communicators for the two hierarchies should be.
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
T * get() const
std::string tolower(const std::string &str)
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:62
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
size_type size() const
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:69
void setParameters(Teuchos::ParameterList &list)
Set parameters.
#define MueLu_minAll(rcpComm, in, out)
static void CheckRepairMainDiagonal(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &Ac, bool const &repairZeroDiagonals, Teuchos::FancyOStream &fos, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< Scalar >::magnitudeType >::zero())
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
RCP< MultiVector > buildNullspace(const int spaceNumber, const Kokkos::View< bool *, typename Node::device_type > &bcs, const bool applyBCs)
Builds a nullspace.
static magnitudeType rmax()
Factory for building tentative prolongator.
static void SetMueLuOFileStream(const std::string &filename)
MsgType toVerbLevel(const std::string &verbLevelStr)
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
static RCP< Time > getNewTimer(const std::string &name)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Additional warnings.
void resize(const size_type n, const T &val=T())
void Build(Level &currentLevel) const
Creates pre and post smoothers.
void allocateMemory(int numVectors) const
allocate multivectors for solve
bool isParameter(const std::string &name) const
bool remove(std::string const &name, bool throwIfNotExists=true)
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
virtual void SetParameterList(const Teuchos::ParameterList &paramList)
Set parameters from a parameter list and return with default values.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BuildCopy(const RCP< const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > A)
void rebalanceCoarse11Matrix(const int rebalanceStriding, const int numProcsCoarseA11)
rebalance the coarse A11 matrix, as well as P11, CoordsCoarse11 and Addon11
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
bool isSublist(const std::string &name) const
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace11, const Teuchos::RCP< RealValuedMultiVector > &NodalCoords, Teuchos::ParameterList &List)
RCP< Matrix > buildVectorNodalProlongator(const Teuchos::RCP< Matrix > &P_nodal) const
void dump(const RCP< Matrix > &A, std::string name) const
dump out matrix
void buildCoarse11Matrix()
Compute coarseA11 = P11^{T}*SM*P11 + addon efficiently.
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
void deepCopy(const ArrayView< const T > &av)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > PtAPWrapper(const RCP< Matrix > &A, const RCP< Matrix > &P, Teuchos::ParameterList &params, const std::string &label)
Performs an P^T AP.
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual void setObjectLabel(const std::string &objectLabel)
AmalgamationFactory for subblocks of strided map based amalgamation data.
void build22Matrix(const bool reuse, const bool doRebalancing, const int rebalanceStriding, const int numProcsA22)
Setup A22 = D0^T SM D0 and rebalance it, as well as D0 and Coords_.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
ConstIterator begin() const
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, bool useKokkos_, Kokkos::View< bool *, typename Node::device_type::memory_space > &BCrowsKokkos, Kokkos::View< bool *, typename Node::device_type::memory_space > &BCcolsKokkos, Kokkos::View< bool *, typename Node::device_type::memory_space > &BCdomainKokkos, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Teuchos::RCP< Teuchos::ParameterList > getValidParamterList()
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Print all timing information.
void setupSubSolve(Teuchos::RCP< Hierarchy > &hierarchy, Teuchos::RCP< Operator > &thyraPrecOp, const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &params, std::string &label, const bool reuse, const bool isSingular=false)
Setup a subsolve.
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
Factory for creating a graph based on a given matrix.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:53
Scalar SC
bool isType(const std::string &name) const
Class for transferring coordinates from a finer level to a coarser one.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
const Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Factory for creating a graph based on a given matrix.
Factory for building coarse matrices.
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
Description of what is happening (more verbose)
Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Matrix2CrsMatrix(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &matrix)
Factory for building coarse matrices.
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
ParameterEntry & getEntry(const std::string &name)
Factory for building Smoothed Aggregation prolongators.Input/output of SaPFactory
Factory for building uncoupled aggregates.
static void thresholdedAbs(const RCP< Matrix > &A, const magnitudeType thresholded)
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void buildProlongator(const int spaceNumber, const Teuchos::RCP< Matrix > &A_nodal_Matrix, const RCP< MultiVector > &EdgeNullspace, Teuchos::RCP< Matrix > &edgeProlongator, Teuchos::RCP< MultiVector > &coarseEdgeNullspace, Teuchos::RCP< RealValuedMultiVector > &coarseNodalCoords) const
T * get() const
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static RCP< Matrix > removeExplicitZeros(const RCP< Matrix > &A, const magnitudeType tolerance, const bool keepDiagonal=true, const size_t expectedNNZperRow=0)
Remove explicit zeros.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
T pop(Teuchos::ParameterList &pl, std::string const &name_in)
bool is_null() const