MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_Utilities.hpp"
73 
74 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
75 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
76 #include "MueLu_CoarseMapFactory_kokkos.hpp"
77 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
78 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
79 #include "MueLu_TentativePFactory_kokkos.hpp"
80 #include "MueLu_Utilities_kokkos.hpp"
81 #include <Kokkos_Core.hpp>
82 #include <KokkosSparse_CrsMatrix.hpp>
83 #endif
84 
85 #include "MueLu_ZoltanInterface.hpp"
86 #include "MueLu_Zoltan2Interface.hpp"
87 #include "MueLu_RepartitionHeuristicFactory.hpp"
88 #include "MueLu_RepartitionFactory.hpp"
89 #include "MueLu_RebalanceAcFactory.hpp"
90 #include "MueLu_RebalanceTransferFactory.hpp"
91 
92 #include "MueLu_VerbosityLevel.hpp"
93 
95 
96 #ifdef HAVE_MUELU_CUDA
97 #include "cuda_profiler_api.h"
98 #endif
99 
100 
101 namespace MueLu {
102 
103  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
105  return SM_Matrix_->getDomainMap();
106  }
107 
108 
109  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
111  return SM_Matrix_->getRangeMap();
112  }
113 
114 
115  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
117 
118  parameterList_ = list;
119  disable_addon_ = list.get("refmaxwell: disable addon",true);
120  mode_ = list.get("refmaxwell: mode","additive");
121  use_as_preconditioner_ = list.get<bool>("refmaxwell: use as preconditioner");
122  dump_matrices_ = list.get("refmaxwell: dump matrices",false);
123 
124  if(list.isSublist("refmaxwell: 11list"))
125  precList11_ = list.sublist("refmaxwell: 11list");
126 
127  if(list.isSublist("refmaxwell: 22list"))
128  precList22_ = list.sublist("refmaxwell: 22list");
129 
130  if(list.isSublist("smoother: params")) {
131  smootherList_ = list.sublist("smoother: params");
132  }
133 
134 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
135  useKokkos_ = false;
136 #elif defined(HAVE_MUELU_KOKKOS_REFACTOR_USE_BY_DEFAULT)
137  useKokkos_ = list.get("use kokkos refactor",true);
138 #else
139  useKokkos_ = list.get("use kokkos refactor",false);
140 #endif
141 
142  }
143 
144 
145  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
147 
148 
149  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType realType;
150 
151 #ifdef HAVE_MUELU_CUDA
152  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
153 #endif
154 
155  Teuchos::TimeMonitor tmCompute(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: compute"));
156 
157  std::map<std::string, MsgType> verbMap;
158  verbMap["none"] = None;
159  verbMap["low"] = Low;
160  verbMap["medium"] = Medium;
161  verbMap["high"] = High;
162  verbMap["extreme"] = Extreme;
163  verbMap["test"] = Test;
164 
166  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", "medium");
167 
168  TEUCHOS_TEST_FOR_EXCEPTION(verbMap.count(verbosityLevel) == 0, Exceptions::RuntimeError,
169  "Invalid verbosity level: \"" << verbosityLevel << "\"");
170  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
171 
172 
173  bool defaultFilter = false;
174 
175  // Remove zero entries from D0 if necessary.
176  // In the construction of the prolongator we use the graph of the
177  // matrix, so zero entries mess it up.
178  if (parameterList_.get<bool>("refmaxwell: filter D0", true) && D0_Matrix_->getNodeMaxNumRowEntries()>2) {
179  Level fineLevel;
180  fineLevel.SetFactoryManager(null);
181  fineLevel.SetLevelID(0);
182  fineLevel.Set("A",D0_Matrix_);
183  fineLevel.setlib(D0_Matrix_->getDomainMap()->lib());
184  // We expect D0 to have entries +-1, so any threshold value will do.
185  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",1.0e-8,/*keepDiagonal=*/false,/*expectedNNZperRow=*/2));
186  fineLevel.Request("A",ThreshFact.get());
187  ThreshFact->Build(fineLevel);
188  D0_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
189 
190  // If D0 has too many zeros, maybe SM and M1 do as well.
191  defaultFilter = true;
192  }
193 
194  if (parameterList_.get<bool>("refmaxwell: filter SM", defaultFilter)) {
195  RCP<Vector> diag = VectorFactory::Build(SM_Matrix_->getRowMap());
196  // find a reasonable absolute value threshold
197  SM_Matrix_->getLocalDiagCopy(*diag);
198  magnitudeType threshold = 1.0e-8 * diag->normInf();
199 
200  Level fineLevel;
201  fineLevel.SetFactoryManager(null);
202  fineLevel.SetLevelID(0);
203  fineLevel.Set("A",SM_Matrix_);
204  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
205  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
206  fineLevel.Request("A",ThreshFact.get());
207  ThreshFact->Build(fineLevel);
208  SM_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
209  }
210 
211  if (parameterList_.get<bool>("refmaxwell: filter M1", defaultFilter)) {
212  RCP<Vector> diag = VectorFactory::Build(M1_Matrix_->getRowMap());
213  // find a reasonable absolute value threshold
214  M1_Matrix_->getLocalDiagCopy(*diag);
215  magnitudeType threshold = 1.0e-8 * diag->normInf();
216 
217  Level fineLevel;
218  fineLevel.SetFactoryManager(null);
219  fineLevel.SetLevelID(0);
220  fineLevel.Set("A",M1_Matrix_);
221  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
222  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
223  fineLevel.Request("A",ThreshFact.get());
224  ThreshFact->Build(fineLevel);
225  M1_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
226  }
227 
228  // clean rows associated with boundary conditions
229  // Find rows with only 1 or 2 nonzero entries, record them in BCrows_.
230  // BCrows_[i] is true, iff i is a boundary row
231  // BCcols_[i] is true, iff i is a boundary column
232 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
233  if (useKokkos_) {
234  BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
235  BCcolsKokkos_ = Utilities_kokkos::DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_);
236  if (IsPrint(Statistics2)) {
237  int BCrowcount = 0;
238  for (size_t i = 0; i<BCrowsKokkos_.size(); i++)
239  if (BCrowsKokkos_(i))
240  BCrowcount += 1;
241  int BCcolcount = 0;
242  for (size_t i = 0; i<BCcolsKokkos_.size(); i++)
243  if (BCcolsKokkos_(i))
244  BCcolcount += 1;
245  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount << " BC rows and " << BCcolcount << " BC columns." << std::endl;
246  }
247  } else
248 #endif
249  {
250  BCrows_ = Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
251  BCcols_ = Utilities::DetectDirichletCols(*D0_Matrix_,BCrows_);
252  if (IsPrint(Statistics2)) {
253  int BCrowcount = 0;
254  for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it)
255  if (*it)
256  BCrowcount += 1;
257  int BCcolcount = 0;
258  for (auto it = BCcols_.begin(); it != BCcols_.end(); ++it)
259  if (*it)
260  BCcolcount += 1;
261  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount << " BC rows and " << BCcolcount << " BC columns." << std::endl;
262  }
263  }
264 
265  // build nullspace if necessary
266  if(Nullspace_ != null) {
267  // no need to do anything - nullspace is built
268  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
269  }
270  else if(Nullspace_ == null && Coords_ != null) {
271  // normalize coordinates
272  typedef typename RealValuedMultiVector::scalar_type realScalarType;
273  typedef typename Teuchos::ScalarTraits<realScalarType>::magnitudeType realMagnitudeType;
274  Array<realMagnitudeType> norms(Coords_->getNumVectors());
275  Coords_->norm2(norms);
276  for (size_t i=0;i<Coords_->getNumVectors();i++)
277  norms[i] = ((realMagnitudeType)1.0)/norms[i];
278  Coords_->scale(norms());
279  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
280 
281  // Cast coordinates to Scalar so they can be multiplied against D0
282 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
283  RCP<MultiVector> CoordsSC;
284  if (useKokkos_)
285  CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
286  else
287  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
288 #else
290 #endif
291  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
292  }
293  else {
294  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
295  }
296 
297  // Nuke the BC edges in nullspace
298 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
299  if (useKokkos_)
300  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
301  else
302  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
303 #else
304  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
305 #endif
306 
307  if (dump_matrices_)
308  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("D0_clean.mat"), *D0_Matrix_);
309 
310  // build special prolongator for (1,1)-block
311  if(P11_==null) {
312  // Form A_nodal = D0* M1 D0 (aka TMT_agg)
313  Level fineLevel, coarseLevel;
314  fineLevel.SetFactoryManager(null);
315  coarseLevel.SetFactoryManager(null);
316  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
317  fineLevel.SetLevelID(0);
318  coarseLevel.SetLevelID(1);
319  fineLevel.Set("A",M1_Matrix_);
320  coarseLevel.Set("P",D0_Matrix_);
321  coarseLevel.setlib(M1_Matrix_->getDomainMap()->lib());
322  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
323  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
324  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
325 
326  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
327  ParameterList rapList = *(rapFact->GetValidParameterList());
328  rapList.set("transpose: use implicit", parameterList_.get<bool>("transpose: use implicit", false));
329  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
330  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
331  rapFact->SetParameterList(rapList);
332 
333  RCP<TransPFactory> transPFactory;
334  if (!parameterList_.get<bool>("transpose: use implicit", false)) {
335  transPFactory = rcp(new TransPFactory());
336  rapFact->SetFactory("R", transPFactory);
337  }
338 
339  coarseLevel.Request("A", rapFact.get());
340 
341  A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
342 
343  // build special prolongator
344  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
345  buildProlongator();
346 
347 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
348  if (useKokkos_)
349  R11_ = Utilities_kokkos::Transpose(*P11_);
350  else
351  R11_ = Utilities::Transpose(*P11_);
352 #else
353  R11_ = Utilities::Transpose(*P11_);
354 #endif
355  }
356 
357 #ifdef HAVE_MPI
358  bool doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", false);
359  int numProcsAH, numProcsA22;
360 #endif
361  {
362  // build coarse grid operator for (1,1)-block
363  formCoarseMatrix();
364 
365 #ifdef HAVE_MPI
366  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
367  if (doRebalancing && numProcs > 1) {
368  GlobalOrdinal globalNumRowsAH = AH_->getRowMap()->getGlobalNumElements();
369  GlobalOrdinal globalNumRowsA22 = D0_Matrix_->getDomainMap()->getGlobalNumElements();
370  double ratio = parameterList_.get<double>("refmaxwell: ratio AH / A22 subcommunicators", 1.0);
371  numProcsAH = numProcs * globalNumRowsAH / (globalNumRowsAH + ratio*globalNumRowsA22);
372  numProcsA22 = numProcs * ratio * globalNumRowsA22 / (globalNumRowsAH + ratio*globalNumRowsA22);
373  if (numProcsAH + numProcsA22 < numProcs)
374  ++numProcsAH;
375  if (numProcsAH + numProcsA22 < numProcs)
376  ++numProcsA22;
377  numProcsAH = std::max(numProcsAH, 1);
378  numProcsA22 = std::max(numProcsA22, 1);
379  } else
380  doRebalancing = false;
381 
382  if (doRebalancing) { // rebalance AH
383  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Rebalance AH"));
384 
385  Level fineLevel, coarseLevel;
386  fineLevel.SetFactoryManager(null);
387  coarseLevel.SetFactoryManager(null);
388  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
389  fineLevel.SetLevelID(0);
390  coarseLevel.SetLevelID(1);
391  coarseLevel.Set("A",AH_);
392  coarseLevel.Set("P",P11_);
393  coarseLevel.Set("R",R11_);
394  coarseLevel.Set("Coordinates",CoordsH_);
395  coarseLevel.Set("number of partitions", numProcsAH);
396  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
397 
398  coarseLevel.setlib(AH_->getDomainMap()->lib());
399  fineLevel.setlib(AH_->getDomainMap()->lib());
400  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
401  fineLevel.setObjectLabel("RefMaxwell (1,1)");
402 
403  // auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
404  // ParameterList repartheurParams;
405  // repartheurParams.set("repartition: start level",0);
406  // repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: min rows per proc", 1024));
407  // repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", 0));
408  // repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
409  // repartheurFactory->SetParameterList(repartheurParams);
410 
411  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
412  RCP<Factory> partitioner;
413  if (partName == "zoltan") {
414 #ifdef HAVE_MUELU_ZOLTAN
415  partitioner = rcp(new ZoltanInterface());
416  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
417  // partitioner->SetFactory("number of partitions", repartheurFactory);
418 #else
419  throw Exceptions::RuntimeError("Zoltan interface is not available");
420 #endif
421  } else if (partName == "zoltan2") {
422 #ifdef HAVE_MUELU_ZOLTAN2
423  partitioner = rcp(new Zoltan2Interface());
424  ParameterList partParams;
425  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
426  partParams.set("ParameterList", partpartParams);
427  partitioner->SetParameterList(partParams);
428  // partitioner->SetFactory("number of partitions", repartheurFactory);
429 #else
430  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
431 #endif
432  }
433 
434  auto repartFactory = rcp(new RepartitionFactory());
435  ParameterList repartParams;
436  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
437  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
438  repartFactory->SetParameterList(repartParams);
439  // repartFactory->SetFactory("number of partitions", repartheurFactory);
440  repartFactory->SetFactory("Partition", partitioner);
441 
442  auto newP = rcp(new RebalanceTransferFactory());
443  ParameterList newPparams;
444  newPparams.set("type", "Interpolation");
445  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
446  newPparams.set("repartition: use subcommunicators", true);
447  newPparams.set("repartition: rebalance Nullspace", false);
448  newP->SetFactory("Coordinates", NoFactory::getRCP());
449  newP->SetParameterList(newPparams);
450  newP->SetFactory("Importer", repartFactory);
451 
452  // Rebalanced R
453  auto newR = rcp(new RebalanceTransferFactory());
454  ParameterList newRparams;
455  newRparams.set("type", "Restriction");
456  newRparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
457  newRparams.set("repartition: use subcommunicators", true);
458  newR->SetParameterList(newRparams);
459  newR->SetFactory("Importer", repartFactory);
460 
461  auto newA = rcp(new RebalanceAcFactory());
462  ParameterList rebAcParams;
463  rebAcParams.set("repartition: use subcommunicators", true);
464  newA->SetParameterList(rebAcParams);
465  newA->SetFactory("Importer", repartFactory);
466 
467  coarseLevel.Request("R", newR.get());
468  coarseLevel.Request("P", newP.get());
469  coarseLevel.Request("Importer", repartFactory.get());
470  coarseLevel.Request("A", newA.get());
471  coarseLevel.Request("Coordinates", newP.get());
472  repartFactory->Build(coarseLevel);
473 
474  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
475  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
476  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
477  R11_ = coarseLevel.Get< RCP<Matrix> >("R", newR.get());
478  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
479  if (!AH_.is_null())
480  AH_->setObjectLabel("RefMaxwell (1,1)");
481  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
482  }
483 #endif // HAVE_MPI
484 
485 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
486  // This should be taken out again as soon as
487  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
488  // drop tol != 0.0
489  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
490  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
491  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
492  precList11_.set("aggregation: drop tol", 0.0);
493  }
494 #endif
495  if (!AH_.is_null()) {
496  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
497  ParameterList& userParamList = precList11_.sublist("user data");
498  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
499  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
500  SetProcRankVerbose(oldRank);
501  }
502  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
503 
504  }
505 
506  {
507  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
508 
509  D0_Matrix_->resumeFill();
510  // Scalar replaceWith = Teuchos::ScalarTraits<SC>::eps();
511  Scalar replaceWith = Teuchos::ScalarTraits<SC>::zero();
512 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
513  if (useKokkos_) {
514  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
515  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
516  } else {
517  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
518  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
519  }
520 #else
521  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
522  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
523 #endif
524  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
525  }
526 
527  {
528  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
529 
530  { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
531  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build A22"));
532 
533  Level fineLevel, coarseLevel;
534  fineLevel.SetFactoryManager(null);
535  coarseLevel.SetFactoryManager(null);
536  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
537  fineLevel.SetLevelID(0);
538  coarseLevel.SetLevelID(1);
539  fineLevel.Set("A",SM_Matrix_);
540  coarseLevel.Set("P",D0_Matrix_);
541  coarseLevel.Set("Coordinates",Coords_);
542 
543  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
544  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
545  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
546  fineLevel.setObjectLabel("RefMaxwell (2,2)");
547 
548  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
549  ParameterList rapList = *(rapFact->GetValidParameterList());
550  rapList.set("transpose: use implicit", false);
551  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
552  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
553  rapFact->SetParameterList(rapList);
554 
555  RCP<TransPFactory> transPFactory;
556  transPFactory = rcp(new TransPFactory());
557  rapFact->SetFactory("R", transPFactory);
558 
559 #ifdef HAVE_MPI
560  if (doRebalancing) {
561 
562  coarseLevel.Set("number of partitions", numProcsA22);
563  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
564 
565  // auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
566  // ParameterList repartheurParams;
567  // repartheurParams.set("repartition: start level",0);
568  // repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: min rows per proc", 1024));
569  // repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", 0));
570  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
571  // repartheurFactory->SetParameterList(repartheurParams);
572  // repartheurFactory->SetFactory("A", rapFact);
573 
574  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
575  RCP<Factory> partitioner;
576  if (partName == "zoltan") {
577 #ifdef HAVE_MUELU_ZOLTAN
578  partitioner = rcp(new ZoltanInterface());
579  partitioner->SetFactory("A", rapFact);
580  // partitioner->SetFactory("number of partitions", repartheurFactory);
581  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
582 #else
583  throw Exceptions::RuntimeError("Zoltan interface is not available");
584 #endif
585  } else if (partName == "zoltan2") {
586 #ifdef HAVE_MUELU_ZOLTAN2
587  partitioner = rcp(new Zoltan2Interface());
588  ParameterList partParams;
589  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
590  partParams.set("ParameterList", partpartParams);
591  partitioner->SetParameterList(partParams);
592  partitioner->SetFactory("A", rapFact);
593  // partitioner->SetFactory("number of partitions", repartheurFactory);
594 #else
595  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
596 #endif
597  }
598 
599  auto repartFactory = rcp(new RepartitionFactory());
600  ParameterList repartParams;
601  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
602  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
603  repartParams.set("repartition: remap accept partition", AH_.is_null());
604  repartFactory->SetParameterList(repartParams);
605  repartFactory->SetFactory("A", rapFact);
606  // repartFactory->SetFactory("number of partitions", repartheurFactory);
607  repartFactory->SetFactory("Partition", partitioner);
608 
609  auto newP = rcp(new RebalanceTransferFactory());
610  ParameterList newPparams;
611  newPparams.set("type", "Interpolation");
612  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
613  newPparams.set("repartition: use subcommunicators", true);
614  newPparams.set("repartition: rebalance Nullspace", false);
615  newP->SetFactory("Coordinates", NoFactory::getRCP());
616  newP->SetParameterList(newPparams);
617  newP->SetFactory("Importer", repartFactory);
618 
619  // Rebalanced R
620  auto newR = rcp(new RebalanceTransferFactory());
621  ParameterList newRparams;
622  newRparams.set("type", "Restriction");
623  newRparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
624  newRparams.set("repartition: use subcommunicators", true);
625  newR->SetParameterList(newRparams);
626  newR->SetFactory("Importer", repartFactory);
627  newR->SetFactory("R", transPFactory);
628 
629  auto newA = rcp(new RebalanceAcFactory());
630  ParameterList rebAcParams;
631  rebAcParams.set("repartition: use subcommunicators", true);
632  newA->SetParameterList(rebAcParams);
633  newA->SetFactory("A", rapFact);
634  newA->SetFactory("Importer", repartFactory);
635 
636  coarseLevel.Request("R", newR.get());
637  coarseLevel.Request("P", newP.get());
638  coarseLevel.Request("Importer", repartFactory.get());
639  coarseLevel.Request("A", newA.get());
640  coarseLevel.Request("Coordinates", newP.get());
641  rapFact->Build(fineLevel,coarseLevel);
642  repartFactory->Build(coarseLevel);
643 
644  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
645  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
646  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
647  D0_T_Matrix_ = coarseLevel.Get< RCP<Matrix> >("R", newR.get());
648  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
649  if (!A22_.is_null())
650  A22_->setObjectLabel("RefMaxwell (2,2)");
651  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
652  } else
653 #endif // HAVE_MPI
654  {
655  coarseLevel.Request("A", rapFact.get());
656  coarseLevel.Request("R", transPFactory.get());
657 
658  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
659  A22_->setObjectLabel("RefMaxwell (2,2)");
660  D0_T_Matrix_ = coarseLevel.Get< RCP<Matrix> >("R", transPFactory.get());
661  }
662  }
663 
664  if (!A22_.is_null()) {
665  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
666  ParameterList& userParamList = precList22_.sublist("user data");
667  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
668  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
669  SetProcRankVerbose(oldRank);
670  }
671  VerboseObject::SetDefaultVerbLevel(verbMap[verbosityLevel]);
672 
673  }
674 
675  {
676  if (parameterList_.isType<std::string>("smoother: type") &&
677  parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
678  SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
679  A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
680  D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
681 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || (defined(HAVE_MUELU_INST_DOUBLE_INT_INT)))
682  ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
683 
684  if (smootherList_.isSublist("smoother: pre params"))
685  smootherPreList = smootherList_.sublist("smoother: pre params");
686  else if (smootherList_.isSublist("smoother: params"))
687  smootherPreList = smootherList_.sublist("smoother: params");
688  hiptmairPreList.set("hiptmair: smoother type 1",
689  smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
690  hiptmairPreList.set("hiptmair: smoother type 2",
691  smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
692  if(smootherPreList.isSublist("hiptmair: smoother list 1"))
693  hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
694  if(smootherPreList.isSublist("hiptmair: smoother list 2"))
695  hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
696  hiptmairPreList.set("hiptmair: pre or post",
697  smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
698  hiptmairPreList.set("hiptmair: zero starting solution",
699  smootherPreList.get<bool>("hiptmair: zero starting solution", true));
700 
701  if (smootherList_.isSublist("smoother: post params"))
702  smootherPostList = smootherList_.sublist("smoother: post params");
703  else if (smootherList_.isSublist("smoother: params"))
704  smootherPostList = smootherList_.sublist("smoother: params");
705  hiptmairPostList.set("hiptmair: smoother type 1",
706  smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
707  hiptmairPostList.set("hiptmair: smoother type 2",
708  smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
709  if(smootherPostList.isSublist("hiptmair: smoother list 1"))
710  hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
711  if(smootherPostList.isSublist("hiptmair: smoother list 2"))
712  hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
713  hiptmairPostList.set("hiptmair: pre or post",
714  smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
715  hiptmairPostList.set("hiptmair: zero starting solution",
716  smootherPostList.get<bool>("hiptmair: zero starting solution", false));
717 
718  typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
719  RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
722 
723  hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
724  hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
725  hiptmairPreSmoother_ -> initialize();
726  hiptmairPreSmoother_ -> compute();
727  hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
728  hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
729  hiptmairPostSmoother_ -> initialize();
730  hiptmairPostSmoother_ -> compute();
731  useHiptmairSmoothing_ = true;
732 #else
733  throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
734 #endif // defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
735  } else {
736  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
737  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
738  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
739 
740  ParameterList preSmootherList, postSmootherList;
741  if (parameterList_.isSublist("smoother: pre params"))
742  preSmootherList = parameterList_.sublist("smoother: pre params");
743  if (parameterList_.isSublist("smoother: post params"))
744  postSmootherList = parameterList_.sublist("smoother: post params");
745 
746  Level level;
747  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
748  level.SetFactoryManager(factoryHandler);
749  level.SetLevelID(0);
750  level.setObjectLabel("RefMaxwell (1,1)");
751  level.Set("A",SM_Matrix_);
752  level.setlib(SM_Matrix_->getDomainMap()->lib());
753 
754  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
755  RCP<SmootherFactory> preSmootherFact = rcp(new SmootherFactory(preSmootherPrototype));
756 
757  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
758  RCP<SmootherFactory> postSmootherFact = rcp(new SmootherFactory(postSmootherPrototype));
759 
760  level.Request("PreSmoother",preSmootherFact.get());
761  preSmootherFact->Build(level);
762  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",preSmootherFact.get());
763 
764  level.Request("PostSmoother",postSmootherFact.get());
765  postSmootherFact->Build(level);
766  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",postSmootherFact.get());
767  } else {
768  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
769  Level level;
770  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
771  level.SetFactoryManager(factoryHandler);
772  level.SetLevelID(0);
773  level.setObjectLabel("RefMaxwell (1,1)");
774  level.Set("A",SM_Matrix_);
775  level.setlib(SM_Matrix_->getDomainMap()->lib());
776  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
777  RCP<SmootherFactory> SmootherFact = rcp(new SmootherFactory(smootherPrototype));
778  level.Request("PreSmoother",SmootherFact.get());
779  SmootherFact->Build(level);
780  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",SmootherFact.get());
781  PostSmoother_ = PreSmoother_;
782  }
783  useHiptmairSmoothing_ = false;
784  }
785  }
786 
787  // Allocate temporary MultiVectors for solve
788  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), 1);
789  if (!ImporterH_.is_null()) {
790  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
791  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), 1);
792  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
793  } else
794  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), 1);
795  D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), 1);
796  if (!Importer22_.is_null()) {
797  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
798  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), 1);
799  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
800  } else
801  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), 1);
802  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), 1);
803 
804 #ifdef HAVE_MUELU_CUDA
805  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
806 #endif
807 
808  describe(GetOStream(Runtime0));
809 
810  if (dump_matrices_) {
811  GetOStream(Runtime0) << "RefMaxwell::compute(): dumping data" << std::endl;
812  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("SM.mat"), *SM_Matrix_);
813  if(!M1_Matrix_.is_null()) Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("M1.mat"), *M1_Matrix_);
814  if(!M0inv_Matrix_.is_null()) Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("M0inv.mat"), *M0inv_Matrix_);
815 #ifndef HAVE_MUELU_KOKKOS_REFACTOR
816  std::ofstream outBCrows("BCrows.mat");
817  std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows, "\n"));
818  std::ofstream outBCcols("BCcols.mat");
819  std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols, "\n"));
820 #else
821  if (useKokkos_) {
822  std::ofstream outBCrows("BCrows.mat");
823  auto BCrows = Kokkos::create_mirror_view (BCrowsKokkos_);
824  Kokkos::deep_copy(BCrows , BCrowsKokkos_);
825  for (size_t i = 0; i < BCrows.size(); i++)
826  outBCrows << BCrows[i] << "\n";
827 
828  std::ofstream outBCcols("BCcols.mat");
829  auto BCcols = Kokkos::create_mirror_view (BCcolsKokkos_);
830  Kokkos::deep_copy(BCcols , BCcolsKokkos_);
831  for (size_t i = 0; i < BCcols.size(); i++)
832  outBCcols << BCcols[i] << "\n";
833  } else {
834  std::ofstream outBCrows("BCrows.mat");
835  std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows, "\n"));
836  std::ofstream outBCcols("BCcols.mat");
837  std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols, "\n"));
838  }
839 #endif
840  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("nullspace.mat"), *Nullspace_);
841  if (Coords_ != null)
842  Xpetra::IO<realType, LO, GlobalOrdinal, Node>::Write(std::string("coords.mat"), *Coords_);
843  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("D0_nuked.mat"), *D0_Matrix_);
844  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("A_nodal.mat"), *A_nodal_Matrix_);
845  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P11.mat"), *P11_);
846  if (!AH_.is_null())
847  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("AH.mat"), *AH_);
848  if (!A22_.is_null())
849  Xpetra::IO<SC, LO, GlobalOrdinal, Node>::Write(std::string("A22.mat"), *A22_);
850  }
851 
853  }
854 
855 
856  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
858  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
859  //
860  // The old implementation used
861  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
862  // yet the paper gives
863  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
864  // where phi_k is the k-th nullspace vector.
865  //
866  // The graph of D0 contains the incidence from nodes to edges.
867  // The nodal prolongator P maps aggregates to nodes.
868 
869  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
870  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
871  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
872  size_t dim = Nullspace_->getNumVectors();
873  size_t numLocalRows = SM_Matrix_->getNodeNumRows();
874 
875  // build prolongator: algorithm 1 in the reference paper
876  // First, build nodal unsmoothed prolongator using the matrix A_nodal
877  RCP<Matrix> P_nodal;
878  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
879  if (read_P_from_file) {
880  // This permits to read in an ML prolongator, so that we get the same hierarchy.
881  // (ML and MueLu typically produce different aggregates.)
882  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.mat"));
883  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap());
884  } else {
885  Level fineLevel, coarseLevel;
886  fineLevel.SetFactoryManager(null);
887  coarseLevel.SetFactoryManager(null);
888  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
889  fineLevel.SetLevelID(0);
890  coarseLevel.SetLevelID(1);
891  fineLevel.Set("A",A_nodal_Matrix_);
892  fineLevel.Set("Coordinates",Coords_);
893  fineLevel.Set("DofsPerNode",1);
894  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
895  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
896  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
897  fineLevel.setObjectLabel("RefMaxwell (1,1)");
898 
899  LocalOrdinal NSdim = 1;
900  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
901  nullSpace->putScalar(SC_ONE);
902  fineLevel.Set("Nullspace",nullSpace);
903 
905 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
906  RCP<Factory> dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact;
907  if (useKokkos_) {
908  dropFact = rcp(new CoalesceDropFactory_kokkos());
909  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
910  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
911  TentativePFact = rcp(new TentativePFactory_kokkos());
912  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
913  } else {
914  dropFact = rcp(new CoalesceDropFactory());
915  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
916  coarseMapFact = rcp(new CoarseMapFactory());
917  TentativePFact = rcp(new TentativePFactory());
918  Tfact = rcp(new CoordinatesTransferFactory());
919  }
920 #else
923  RCP<CoarseMapFactory> coarseMapFact = rcp(new CoarseMapFactory());
924  RCP<TentativePFactory> TentativePFact = rcp(new TentativePFactory());
926 #endif
927  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
928  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
929  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
930 
931  UncoupledAggFact->SetFactory("Graph", dropFact);
932 
933  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
934 
935  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
936  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
937  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
938 
939  Tfact->SetFactory("Aggregates", UncoupledAggFact);
940  Tfact->SetFactory("CoarseMap", coarseMapFact);
941 
942  coarseLevel.Request("P",TentativePFact.get());
943  coarseLevel.Request("Coordinates",Tfact.get());
944 
945  coarseLevel.Get("P",P_nodal,TentativePFact.get());
946  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
947  }
948  if (dump_matrices_)
949  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P_nodal.mat"), *P_nodal);
950 
951  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
952 
953  // Import off-rank rows of P_nodal into P_nodal_imported
954  RCP<CrsMatrix> P_nodal_imported;
955  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
956  if (numProcs > 1) {
957  RCP<CrsMatrixWrap> P_nodal_temp;
958  RCP<const Map> targetMap = D0Crs->getColMap();
959  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap, 0));
960  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
961  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
962  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
963  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
964  P_nodal_imported = P_nodal_temp->getCrsMatrix();
965  if (dump_matrices_)
966  Xpetra::IO<SC, LO, GO, NO>::Write(std::string("P_nodal_imported.mat"), *P_nodal_temp);
967  } else
968  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
969 
970 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
971  if (useKokkos_) {
972 
973  using ATS = Kokkos::ArithTraits<SC>;
975 
976  typedef typename Matrix::local_matrix_type KCRS;
977  typedef typename KCRS::device_type device_t;
978  typedef typename KCRS::StaticCrsGraphType graph_t;
979  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
980  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
981  typedef typename KCRS::values_type::non_const_type scalar_view_t;
982 
983  // Get data out of P_nodal_imported and D0.
984  auto localP = P_nodal_imported->getLocalMatrix();
985  auto localD0 = D0_Matrix_->getLocalMatrix();
986 
987  // Which algorithm should we use for the construction of the special prolongator?
988  // Option "mat-mat":
989  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
990  std::string defaultAlgo = "mat-mat";
991  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
992 
993  if (algo == "mat-mat") {
994  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
995  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
996 
997  // Get data out of D0*P.
998  auto localD0P = D0_P_nodal->getLocalMatrix();
999 
1000  // Create the matrix object
1001  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1002  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1003 
1004  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1005  lno_nnz_view_t P11colind("P11_colind",dim*localD0P.graph.entries.size());
1006  scalar_view_t P11vals("P11_vals",dim*localD0P.graph.entries.size());
1007 
1008  // adjust rowpointer
1009  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1010  KOKKOS_LAMBDA(const size_t i) {
1011  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1012  });
1013 
1014  // adjust column indices
1015  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1016  KOKKOS_LAMBDA(const size_t jj) {
1017  for (size_t k = 0; k < dim; k++) {
1018  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1019  P11vals(dim*jj+k) = SC_ZERO;
1020  }
1021  });
1022 
1023  auto localNullspace = Nullspace_->template getLocalView<device_t>();
1024 
1025  // enter values
1026  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1027  // The matrix D0 has too many entries per row.
1028  // Therefore we need to check whether its entries are actually non-zero.
1029  // This is the case for the matrices built by MiniEM.
1030  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1031 
1033 
1034  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1035  KOKKOS_LAMBDA(const size_t i) {
1036  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1037  LO l = localD0.graph.entries(ll);
1038  SC p = localD0.values(ll);
1039  if (ATS::magnitude(p) < tol)
1040  continue;
1041  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1042  LO j = localP.graph.entries(jj);
1043  SC v = localP.values(jj);
1044  for (size_t k = 0; k < dim; k++) {
1045  LO jNew = dim*j+k;
1046  SC n = localNullspace(i,k);
1047  size_t m;
1048  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1049  if (P11colind(m) == jNew)
1050  break;
1051 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1052  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1053 #endif
1054  P11vals(m) += half * v * n;
1055  }
1056  }
1057  }
1058  });
1059 
1060  } else {
1061  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1062  KOKKOS_LAMBDA(const size_t i) {
1063  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1064  LO l = localD0.graph.entries(ll);
1065  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1066  LO j = localP.graph.entries(jj);
1067  SC v = localP.values(jj);
1068  for (size_t k = 0; k < dim; k++) {
1069  LO jNew = dim*j+k;
1070  SC n = localNullspace(i,k);
1071  size_t m;
1072  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1073  if (P11colind(m) == jNew)
1074  break;
1075 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1076  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1077 #endif
1078  P11vals(m) += half * v * n;
1079  }
1080  }
1081  }
1082  });
1083  }
1084 
1085  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1086  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1087  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1088  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1089 
1090  } else
1091  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1092  } else {
1093 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1094 
1095  // get nullspace vectors
1096  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1097  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1098  for(size_t i=0; i<dim; i++) {
1099  nullspaceRCP[i] = Nullspace_->getData(i);
1100  nullspace[i] = nullspaceRCP[i]();
1101  }
1102 
1103  // Get data out of P_nodal_imported and D0.
1104  ArrayRCP<const size_t> Prowptr_RCP, D0rowptr_RCP;
1105  ArrayRCP<const LO> Pcolind_RCP, D0colind_RCP;
1106  ArrayRCP<const SC> Pvals_RCP, D0vals_RCP;
1107  ArrayRCP<size_t> P11rowptr_RCP;
1108  ArrayRCP<LO> P11colind_RCP;
1109  ArrayRCP<SC> P11vals_RCP;
1110 
1111  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1112  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1113 
1114  // For efficiency
1115  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1116  // slower than Teuchos::ArrayView::operator[].
1117  ArrayView<const size_t> Prowptr, D0rowptr;
1118  ArrayView<const LO> Pcolind, D0colind;
1119  ArrayView<const SC> Pvals, D0vals;
1120  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1121  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1122 
1123  // Which algorithm should we use for the construction of the special prolongator?
1124  // Option "mat-mat":
1125  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1126  // Option "gustavson":
1127  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1128  // More efficient, but only available for serial node.
1129  std::string defaultAlgo = "gustavson";
1130  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1131 
1132  if (algo == "mat-mat") {
1133  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1134  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1135 
1136  // Get data out of D0*P.
1137  ArrayRCP<const size_t> D0Prowptr_RCP;
1138  ArrayRCP<const LO> D0Pcolind_RCP;
1139  ArrayRCP<const SC> D0Pvals_RCP;
1140  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1141 
1142  // For efficiency
1143  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1144  // slower than Teuchos::ArrayView::operator[].
1145  ArrayView<const size_t> D0Prowptr;
1146  ArrayView<const LO> D0Pcolind;
1147  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1148 
1149  // Create the matrix object
1150  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1151  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1152  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1153  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1154  P11Crs->allocateAllValues(dim*D0Pcolind.size(), P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1155 
1156  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1157  ArrayView<LO> P11colind = P11colind_RCP();
1158  ArrayView<SC> P11vals = P11vals_RCP();
1159 
1160  // adjust rowpointer
1161  for (size_t i = 0; i < numLocalRows+1; i++) {
1162  P11rowptr[i] = dim*D0Prowptr[i];
1163  }
1164 
1165  // adjust column indices
1166  size_t nnz = 0;
1167  for (size_t jj = 0; jj < (size_t) D0Pcolind.size(); jj++)
1168  for (size_t k = 0; k < dim; k++) {
1169  P11colind[nnz] = dim*D0Pcolind[jj]+k;
1170  P11vals[nnz] = SC_ZERO;
1171  nnz++;
1172  }
1173 
1174  // enter values
1175  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1176  // The matrix D0 has too many entries per row.
1177  // Therefore we need to check whether its entries are actually non-zero.
1178  // This is the case for the matrices built by MiniEM.
1179  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1180 
1182  for (size_t i = 0; i < numLocalRows; i++) {
1183  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1184  LO l = D0colind[ll];
1185  SC p = D0vals[ll];
1187  continue;
1188  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1189  LO j = Pcolind[jj];
1190  SC v = Pvals[jj];
1191  for (size_t k = 0; k < dim; k++) {
1192  LO jNew = dim*j+k;
1193  SC n = nullspace[k][i];
1194  size_t m;
1195  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1196  if (P11colind[m] == jNew)
1197  break;
1198 #ifdef HAVE_MUELU_DEBUG
1199  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1200 #endif
1201  P11vals[m] += half * v * n;
1202  }
1203  }
1204  }
1205  }
1206  } else {
1207  // enter values
1208  for (size_t i = 0; i < numLocalRows; i++) {
1209  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1210  LO l = D0colind[ll];
1211  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1212  LO j = Pcolind[jj];
1213  SC v = Pvals[jj];
1214  for (size_t k = 0; k < dim; k++) {
1215  LO jNew = dim*j+k;
1216  SC n = nullspace[k][i];
1217  size_t m;
1218  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1219  if (P11colind[m] == jNew)
1220  break;
1221 #ifdef HAVE_MUELU_DEBUG
1222  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1223 #endif
1224  P11vals[m] += half * v * n;
1225  }
1226  }
1227  }
1228  }
1229  }
1230 
1231  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1232  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1233 
1234  } else if (algo == "gustavson") {
1235 
1236  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1237  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1238  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1239  // This is ad-hoc and should maybe be replaced with some better heuristics.
1240  size_t nnz_alloc = dim*D0vals_RCP.size();
1241 
1242  // Create the matrix object
1243  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1244  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1245  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0, Xpetra::StaticProfile));
1246  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1247  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1248 
1249  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1250  ArrayView<LO> P11colind = P11colind_RCP();
1251  ArrayView<SC> P11vals = P11vals_RCP();
1252 
1253  size_t nnz;
1254  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1255  // The matrix D0 has too many entries per row.
1256  // Therefore we need to check whether its entries are actually non-zero.
1257  // This is the case for the matrices built by MiniEM.
1258  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1259 
1261  nnz = 0;
1262  size_t nnz_old = 0;
1263  for (size_t i = 0; i < numLocalRows; i++) {
1264  P11rowptr[i] = nnz;
1265  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1266  LO l = D0colind[ll];
1267  SC p = D0vals[ll];
1269  continue;
1270  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1271  LO j = Pcolind[jj];
1272  SC v = Pvals[jj];
1273  for (size_t k = 0; k < dim; k++) {
1274  LO jNew = dim*j+k;
1275  SC n = nullspace[k][i];
1276  // do we already have an entry for (i, jNew)?
1277  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1278  P11_status[jNew] = nnz;
1279  P11colind[nnz] = jNew;
1280  P11vals[nnz] = half * v * n;
1281  // or should it be
1282  // P11vals[nnz] = half * n;
1283  nnz++;
1284  } else {
1285  P11vals[P11_status[jNew]] += half * v * n;
1286  // or should it be
1287  // P11vals[P11_status[jNew]] += half * n;
1288  }
1289  }
1290  }
1291  }
1292  nnz_old = nnz;
1293  }
1294  P11rowptr[numLocalRows] = nnz;
1295  } else {
1296  nnz = 0;
1297  size_t nnz_old = 0;
1298  for (size_t i = 0; i < numLocalRows; i++) {
1299  P11rowptr[i] = nnz;
1300  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1301  LO l = D0colind[ll];
1302  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1303  LO j = Pcolind[jj];
1304  SC v = Pvals[jj];
1305  for (size_t k = 0; k < dim; k++) {
1306  LO jNew = dim*j+k;
1307  SC n = nullspace[k][i];
1308  // do we already have an entry for (i, jNew)?
1309  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1310  P11_status[jNew] = nnz;
1311  P11colind[nnz] = jNew;
1312  P11vals[nnz] = half * v * n;
1313  // or should it be
1314  // P11vals[nnz] = half * n;
1315  nnz++;
1316  } else {
1317  P11vals[P11_status[jNew]] += half * v * n;
1318  // or should it be
1319  // P11vals[P11_status[jNew]] += half * n;
1320  }
1321  }
1322  }
1323  }
1324  nnz_old = nnz;
1325  }
1326  P11rowptr[numLocalRows] = nnz;
1327  }
1328 
1329  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1330  // Downward resize
1331  // - Cannot resize for Epetra, as it checks for same pointers
1332  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1333  P11vals_RCP.resize(nnz);
1334  P11colind_RCP.resize(nnz);
1335  }
1336 
1337  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1338  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1339  } else
1340  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1341 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1342  }
1343 #endif
1344  }
1345 
1346  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1348  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build coarse (1,1) matrix"));
1349 
1350  // coarse matrix for P11* (M1 + D1* M2 D1) P11
1351  RCP<Matrix> Matrix1 = MatrixFactory::Build(P11_->getDomainMap(),0);
1352  if (parameterList_.get<bool>("rap: triple product", false) == false) {
1353  RCP<Matrix> C = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1354  // construct (M1 + D1* M2 D1) P11
1355  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,*C,true,true);
1356  // construct P11* (M1 + D1* M2 D1) P11
1357  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*R11_,false,*C,false,*Matrix1,true,true);
1358  } else {
1360  MultiplyRAP(*P11_, true, *SM_Matrix_, false, *P11_, false, *Matrix1, true, true);
1361  }
1362  if (parameterList_.get<bool>("rap: fix zero diagonals", true)) {
1363  const double threshold = parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps());
1364  Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(Matrix1, true, GetOStream(Warnings1), threshold);
1365  }
1366 
1367  if(disable_addon_==true) {
1368  // if add-on is not chosen
1369  AH_=Matrix1;
1370  }
1371  else {
1372  Teuchos::TimeMonitor tmAddon(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: Build coarse addon matrix"));
1373  // catch a failure
1374  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1375  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1376  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1377 
1378  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
1379  RCP<Matrix> Zaux = MatrixFactory::Build(M1_Matrix_->getRowMap(),0);
1380  RCP<Matrix> Z = MatrixFactory::Build(D0_Matrix_->getDomainMap(),0);
1381 
1382  // construct Zaux = M1 P11
1383  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,*Zaux,true,true);
1384  // construct Z = D0* M1 P11 = D0* Zaux
1385  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,*Z,true,true);
1386 
1387  // construct Z* M0inv Z
1388  RCP<Matrix> Matrix2 = MatrixFactory::Build(Z->getDomainMap(),0);
1389  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
1390  // We assume that if M0inv has at most one entry per row then
1391  // these are all diagonal entries.
1392 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1393  RCP<Matrix> ZT;
1394  if (useKokkos_)
1395  ZT = Utilities_kokkos::Transpose(*Z);
1396  else
1397  ZT = Utilities::Transpose(*Z);
1398 #else
1400 #endif
1401  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
1402  M0inv_Matrix_->getLocalDiagCopy(*diag);
1403  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
1404  Z->leftScale(*diag);
1405  else {
1406  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
1407  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
1408  diag2->doImport(*diag,*importer,Xpetra::INSERT);
1409  Z->leftScale(*diag2);
1410  }
1411  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*ZT,false,*Z,false,*Matrix2,true,true);
1412  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
1413  RCP<Matrix> C2 = MatrixFactory::Build(M0inv_Matrix_->getRowMap(),0);
1414  // construct C2 = M0inv Z
1415  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,*C2,true,true);
1416  // construct Matrix2 = Z* M0inv Z = Z* C2
1417  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,*Matrix2,true,true);
1418  } else {
1419  // construct Matrix2 = Z* M0inv Z
1421  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *Matrix2, true, true);
1422  }
1423  // add matrices together
1424  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,(Scalar)1.0,*Matrix2,false,(Scalar)1.0,AH_,GetOStream(Runtime0));
1425  AH_->fillComplete();
1426  }
1427 
1428  // set fixed block size for vector nodal matrix
1429  size_t dim = Nullspace_->getNumVectors();
1430  AH_->SetFixedBlockSize(dim);
1431  AH_->setObjectLabel("RefMaxwell (1,1)");
1432 
1433  }
1434 
1435 
1436  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1438  SM_Matrix_ = SM_Matrix_new;
1439  }
1440 
1441 
1442  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1444 
1445  // iterate on coarse (1, 1) block
1446  if (!ImporterH_.is_null()) {
1447  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import coarse (1,1)"));
1448  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
1449  P11res_.swap(P11resTmp_);
1450  }
1451  if (!AH_.is_null()) {
1452  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve coarse (1,1)"));
1453 
1454  RCP<const Map> origXMap = P11x_->getMap();
1455  RCP<const Map> origRhsMap = P11res_->getMap();
1456 
1457  // Replace maps with maps with a subcommunicator
1458  P11res_->replaceMap(AH_->getRangeMap());
1459  P11x_ ->replaceMap(AH_->getDomainMap());
1460  HierarchyH_->Iterate(*P11res_, *P11x_, 1, true);
1461  P11x_ ->replaceMap(origXMap);
1462  P11res_->replaceMap(origRhsMap);
1463  }
1464  if (!ImporterH_.is_null()) {
1465  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export coarse (1,1)"));
1466  P11xTmp_->doExport(*P11x_, *ImporterH_, Xpetra::INSERT);
1467  P11x_.swap(P11xTmp_);
1468  }
1469  }
1470 
1471 
1472  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1474  // iterate on (2, 2) block
1475  if (!Importer22_.is_null()) {
1476  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import (2,2)"));
1477  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
1478  D0res_.swap(D0resTmp_);
1479  }
1480  if (!A22_.is_null()) {
1481  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve (2,2)"));
1482 
1483  RCP<const Map> origXMap = D0x_->getMap();
1484  RCP<const Map> origRhsMap = D0res_->getMap();
1485 
1486  // Replace maps with maps with a subcommunicator
1487  D0res_->replaceMap(A22_->getRangeMap());
1488  D0x_ ->replaceMap(A22_->getDomainMap());
1489  Hierarchy22_->Iterate(*D0res_, *D0x_, 1, true);
1490  D0x_ ->replaceMap(origXMap);
1491  D0res_->replaceMap(origRhsMap);
1492  }
1493  if (!Importer22_.is_null()) {
1494  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export (2,2)"));
1495  D0xTmp_->doExport(*D0x_, *Importer22_, Xpetra::INSERT);
1496  D0x_.swap(D0xTmp_);
1497  }
1498  }
1499 
1500 
1501  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1502  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
1503 
1504  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1505 
1506  { // compute residuals
1507 
1508  Teuchos::TimeMonitor tmRes(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: residual calculation"));
1509  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1510  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1511  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1512  }
1513 
1514  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
1515 
1516  if (!ImporterH_.is_null()) {
1517  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import coarse (1,1)"));
1518  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
1519  P11res_.swap(P11resTmp_);
1520  }
1521  if (!Importer22_.is_null()) {
1522  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: import (2,2)"));
1523  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
1524  D0res_.swap(D0resTmp_);
1525  }
1526 
1527  // iterate on coarse (1, 1) block
1528  if (!AH_.is_null()) {
1529  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve coarse (1,1)"));
1530 
1531  RCP<const Map> origXMap = P11x_->getMap();
1532  RCP<const Map> origRhsMap = P11res_->getMap();
1533 
1534  // Replace maps with maps with a subcommunicator
1535  P11res_->replaceMap(AH_->getRangeMap());
1536  P11x_ ->replaceMap(AH_->getDomainMap());
1537  HierarchyH_->Iterate(*P11res_, *P11x_, 1, true);
1538  P11x_ ->replaceMap(origXMap);
1539  P11res_->replaceMap(origRhsMap);
1540  }
1541 
1542  // iterate on (2, 2) block
1543  if (!A22_.is_null()) {
1544  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve (2,2)"));
1545 
1546  RCP<const Map> origXMap = D0x_->getMap();
1547  RCP<const Map> origRhsMap = D0res_->getMap();
1548 
1549  // Replace maps with maps with a subcommunicator
1550  D0res_->replaceMap(A22_->getRangeMap());
1551  D0x_ ->replaceMap(A22_->getDomainMap());
1552  Hierarchy22_->Iterate(*D0res_, *D0x_, 1, true);
1553  D0x_ ->replaceMap(origXMap);
1554  D0res_->replaceMap(origRhsMap);
1555  }
1556 
1557  if (!Importer22_.is_null()) {
1558  Teuchos::TimeMonitor tm22(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export (2,2)"));
1559  D0xTmp_->doExport(*D0x_, *Importer22_, Xpetra::INSERT);
1560  D0x_.swap(D0xTmp_);
1561  }
1562 
1563  if (!ImporterH_.is_null()) {
1564  Teuchos::TimeMonitor tmH(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: export coarse (1,1)"));
1565  P11xTmp_->doExport(*P11x_, *ImporterH_, Xpetra::INSERT);
1566  P11x_.swap(P11xTmp_);
1567  }
1568 
1569  { // update current solution
1570  Teuchos::TimeMonitor tmUp(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: update"));
1571  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1572  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
1573  X.update(one, *residual_, one);
1574 
1575  if (!ImporterH_.is_null()) {
1576  P11res_.swap(P11resTmp_);
1577  P11x_.swap(P11xTmp_);
1578  }
1579  if (!Importer22_.is_null()) {
1580  D0res_.swap(D0resTmp_);
1581  D0x_.swap(D0xTmp_);
1582  }
1583 
1584  }
1585  }
1586 
1587 
1588  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1589  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse121(const MultiVector& RHS, MultiVector& X) const {
1590 
1591  // precondition (1,1)-block
1592  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1593  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1594  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1595  solveH();
1596  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1597  X.update(one, *residual_, one);
1598  if (!ImporterH_.is_null()) {
1599  P11res_.swap(P11resTmp_);
1600  P11x_.swap(P11xTmp_);
1601  }
1602 
1603  // precondition (2,2)-block
1604  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1605  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1606  solve22();
1607  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
1608  X.update(one, *residual_, one);
1609  if (!Importer22_.is_null()) {
1610  D0res_.swap(D0resTmp_);
1611  D0x_.swap(D0xTmp_);
1612  }
1613 
1614  // precondition (1,1)-block
1615  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1616  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1617  solveH();
1618  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1619  X.update(one, *residual_, one);
1620  if (!ImporterH_.is_null()) {
1621  P11res_.swap(P11resTmp_);
1622  P11x_.swap(P11xTmp_);
1623  }
1624 
1625  }
1626 
1627 
1628  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1629  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse212(const MultiVector& RHS, MultiVector& X) const {
1630 
1631  // precondition (2,2)-block
1632  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1633  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1634  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1635  solve22();
1636  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
1637  X.update(one, *residual_, one);
1638  if (!Importer22_.is_null()) {
1639  D0res_.swap(D0resTmp_);
1640  D0x_.swap(D0xTmp_);
1641  }
1642 
1643  // precondition (1,1)-block
1644  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1645  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1646  solveH();
1647  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1648  X.update(one, *residual_, one);
1649  if (!ImporterH_.is_null()) {
1650  P11res_.swap(P11resTmp_);
1651  P11x_.swap(P11xTmp_);
1652  }
1653 
1654  // precondition (2,2)-block
1655  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
1656  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
1657  solve22();
1658  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
1659  X.update(one, *residual_, one);
1660  if (!Importer22_.is_null()) {
1661  D0res_.swap(D0resTmp_);
1662  D0x_.swap(D0xTmp_);
1663  }
1664 
1665  }
1666 
1667  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1668  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse11only(const MultiVector& RHS, MultiVector& X) const {
1669 
1670  // compute residuals
1671  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1672  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
1673  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
1674 
1675  solveH();
1676 
1677  // update current solution
1678  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
1679  X.update(one, *residual_, one);
1680 
1681  if (!ImporterH_.is_null()) {
1682  P11res_.swap(P11resTmp_);
1683  P11x_.swap(P11xTmp_);
1684  }
1685 
1686  }
1687 
1688 
1689  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1690  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
1691  Teuchos::ETransp /* mode */,
1692  Scalar /* alpha */,
1693  Scalar /* beta */) const {
1694 
1695  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: solve"));
1696 
1697  // make sure that we have enough temporary memory
1698  if (X.getNumVectors() != P11res_->getNumVectors()) {
1699  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), X.getNumVectors());
1700  if (!ImporterH_.is_null()) {
1701  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1702  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), X.getNumVectors());
1703  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1704  } else
1705  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), X.getNumVectors());
1706  if (!Importer22_.is_null()) {
1707  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1708  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), X.getNumVectors());
1709  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1710  } else
1711  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), X.getNumVectors());
1712  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), X.getNumVectors());
1713 
1714  }
1715 
1716  { // apply pre-smoothing
1717 
1718  Teuchos::TimeMonitor tmSm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: smoothing"));
1719 
1720 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
1721  if (useHiptmairSmoothing_) {
1722  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
1723  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
1724  hiptmairPreSmoother_->apply(tRHS, tX);
1725  }
1726  else
1727 #endif
1728  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
1729  }
1730 
1731  // do solve for the 2x2 block system
1732  if(mode_=="additive")
1733  applyInverseAdditive(RHS,X);
1734  else if(mode_=="121")
1735  applyInverse121(RHS,X);
1736  else if(mode_=="212")
1737  applyInverse212(RHS,X);
1738  else if(mode_=="11only")
1739  applyInverse11only(RHS,X);
1740  else if(mode_=="none") {
1741  // do nothing
1742  }
1743  else
1744  applyInverseAdditive(RHS,X);
1745 
1746  { // apply post-smoothing
1747 
1748  Teuchos::TimeMonitor tmSm(*Teuchos::TimeMonitor::getNewTimer("MueLu RefMaxwell: smoothing"));
1749 
1750 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
1751  if (useHiptmairSmoothing_)
1752  {
1753  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
1754  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
1755  hiptmairPostSmoother_->apply(tRHS, tX);
1756  }
1757  else
1758 #endif
1759  PostSmoother_->Apply(X, RHS, false);
1760  }
1761 
1762  }
1763 
1764 
1765  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1767  return false;
1768  }
1769 
1770 
1771  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1774  const Teuchos::RCP<Matrix> & M0inv_Matrix,
1775  const Teuchos::RCP<Matrix> & M1_Matrix,
1776  const Teuchos::RCP<MultiVector> & Nullspace,
1777  const Teuchos::RCP<RealValuedMultiVector> & Coords,
1778  Teuchos::ParameterList& List)
1779  {
1780  // some pre-conditions
1781  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
1782  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
1783 
1784  HierarchyH_ = Teuchos::null;
1785  Hierarchy22_ = Teuchos::null;
1786  PreSmoother_ = Teuchos::null;
1787  PostSmoother_ = Teuchos::null;
1788  disable_addon_ = false;
1789  mode_ = "additive";
1790 
1791  // set parameters
1792  setParameters(List);
1793 
1794  D0_Matrix_ = D0_Matrix;
1795  M0inv_Matrix_ = M0inv_Matrix;
1796  M1_Matrix_ = M1_Matrix;
1797  Coords_ = Coords;
1798  Nullspace_ = Nullspace;
1799  }
1800 
1801  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1803  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
1804 
1805  std::ostringstream oss;
1806 
1807  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
1808 
1809 #ifdef HAVE_MPI
1810  int root;
1811  if (!A22_.is_null())
1812  root = comm->getRank();
1813  else
1814  root = -1;
1815 
1816  int actualRoot;
1817  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
1818  root = actualRoot;
1819 #endif
1820 
1821 
1822  oss << "\n--------------------------------------------------------------------------------\n" <<
1823  "--- RefMaxwell Summary ---\n"
1824  "--------------------------------------------------------------------------------" << std::endl;
1825  oss << std::endl;
1826 
1827  GlobalOrdinal numRows;
1828  GlobalOrdinal nnz;
1829 
1830  SM_Matrix_->getRowMap()->getComm()->barrier();
1831 
1832  numRows = SM_Matrix_->getGlobalNumRows();
1833  nnz = SM_Matrix_->getGlobalNumEntries();
1834 
1835  Xpetra::global_size_t tt = numRows;
1836  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
1837  tt = nnz;
1838  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
1839 
1840  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
1841  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
1842 
1843  if (!A22_.is_null()) {
1844  // ToDo: make sure that this is printed correctly
1845  numRows = A22_->getGlobalNumRows();
1846  nnz = A22_->getGlobalNumEntries();
1847 
1848  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
1849  }
1850 
1851  oss << std::endl;
1852 
1853  std::string outstr = oss.str();
1854 
1855 #ifdef HAVE_MPI
1856  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
1857  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
1858 
1859  int strLength = outstr.size();
1860  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
1861  if (comm->getRank() != root)
1862  outstr.resize(strLength);
1863  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
1864 #endif
1865 
1866  out << outstr;
1867 
1868  if (IsPrint(Statistics2)) {
1869  // Print the grid of processors
1870  std::ostringstream oss2;
1871 
1872  oss2 << "Sub-solver distribution over ranks" << std::endl;
1873  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
1874 
1875  int numProcs = comm->getSize();
1876 #ifdef HAVE_MPI
1877  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
1878  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
1879  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
1880 #endif
1881 
1882  char status = 0;
1883  if (!AH_.is_null())
1884  status += 1;
1885  if (!A22_.is_null())
1886  status += 2;
1887  std::vector<char> states(numProcs, 0);
1888 #ifdef HAVE_MPI
1889  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
1890 #else
1891  states.push_back(status);
1892 #endif
1893 
1894  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
1895  for (int proc = 0; proc < numProcs; proc += rowWidth) {
1896  for (int j = 0; j < rowWidth; j++)
1897  if (proc + j < numProcs)
1898  if (states[proc+j] == 0)
1899  oss2 << ".";
1900  else if (states[proc+j] == 1)
1901  oss2 << "1";
1902  else if (states[proc+j] == 2)
1903  oss2 << "2";
1904  else
1905  oss2 << "B";
1906  else
1907  oss2 << " ";
1908 
1909  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
1910  }
1911  oss2 << std::endl;
1912  GetOStream(Statistics2) << oss2.str();
1913  }
1914 
1915 
1916  }
1917 
1918 } // namespace
1919 
1920 #define MUELU_REFMAXWELL_SHORT
1921 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
void solveH() const
solve coarse (1,1) block
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
Factory for generating coarse level map. Used by TentativePFactory.
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
void solve22() const
solve (2,2) block
static magnitudeType eps()
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
T & get(const std::string &name, T def_value)
Class that encapsulates external library smoothers.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static void MultiplyRAP(const Matrix &R, bool transposeR, const Matrix &A, bool transposeA, const Matrix &P, bool transposeP, Matrix &Ac, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
size_type size() const
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
LocalOrdinal LO
One-liner description of what is happening.
T * get() const
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
size_type size() const
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void buildProlongator()
Setup the prolongator for the (1,1)-block.
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
static void CheckRepairMainDiagonal(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &Ac, bool const &repairZeroDiagonals, Teuchos::FancyOStream &fos, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< Scalar >::magnitudeType >::zero())
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
static RCP< Time > getNewTimer(const std::string &name)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
Additional warnings.
void resize(const size_type n, const T &val=T())
void Build(Level &currentLevel) const
Creates pre and post smoothers.
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
virtual void SetParameterList(const Teuchos::ParameterList &paramList)
Set parameters from a parameter list and return with default values.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void compute()
Setup the preconditioner.
bool isSublist(const std::string &name) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
virtual void setObjectLabel(const std::string &objectLabel)
AmalgamationFactory for subblocks of strided map based amalgamation data.
void applyInverse212(const MultiVector &RHS, MultiVector &X) const
apply 2-1-2 algorithm for 2x2 solve
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void applyInverse121(const MultiVector &RHS, MultiVector &X) const
apply 1-2-1 algorithm for 2x2 solve
Applies permutation to grid transfer operators.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
void SetParameter(const std::string &name, const ParameterEntry &entry)
Set a parameter directly as a ParameterEntry.
Factory for creating a graph base on a given matrix.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
void Build(Level &currentLevel) const
Build an object with this factory.
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
static Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Read(const std::string &fileName, Xpetra::UnderlyingLib lib, const RCP< const Teuchos::Comm< int > > &comm, bool binary=false)
virtual void SetFactory(const std::string &varName, const RCP< const FactoryBase > &factory)=0
Configuration.
Scalar SC
Class for transferring coordinates from a finer level to a coarser one.
Factory for building restriction operators.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void applyInverse11only(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define TEUCHOS_ASSERT(assertion_test)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
Factory for building coarse matrices.
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building uncoupled aggregates.
Factory for building a thresholded operator.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new)
Reset system matrix.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
bool is_null() const