46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_Utilities.hpp"
74 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
75 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
76 #include "MueLu_CoarseMapFactory_kokkos.hpp"
77 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
78 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
79 #include "MueLu_TentativePFactory_kokkos.hpp"
80 #include "MueLu_Utilities_kokkos.hpp"
81 #include <Kokkos_Core.hpp>
82 #include <KokkosSparse_CrsMatrix.hpp>
85 #include "MueLu_ZoltanInterface.hpp"
86 #include "MueLu_Zoltan2Interface.hpp"
87 #include "MueLu_RepartitionHeuristicFactory.hpp"
88 #include "MueLu_RepartitionFactory.hpp"
89 #include "MueLu_RebalanceAcFactory.hpp"
90 #include "MueLu_RebalanceTransferFactory.hpp"
96 #ifdef HAVE_MUELU_CUDA
97 #include "cuda_profiler_api.h"
103 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
105 return SM_Matrix_->getDomainMap();
109 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
111 return SM_Matrix_->getRangeMap();
115 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 parameterList_ = list;
119 disable_addon_ = list.
get(
"refmaxwell: disable addon",
true);
120 mode_ = list.
get(
"refmaxwell: mode",
"additive");
121 use_as_preconditioner_ = list.
get<
bool>(
"refmaxwell: use as preconditioner");
122 dump_matrices_ = list.
get(
"refmaxwell: dump matrices",
false);
125 precList11_ = list.
sublist(
"refmaxwell: 11list");
128 precList22_ = list.
sublist(
"refmaxwell: 22list");
131 smootherList_ = list.
sublist(
"smoother: params");
134 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
136 #elif defined(HAVE_MUELU_KOKKOS_REFACTOR_USE_BY_DEFAULT)
137 useKokkos_ = list.
get(
"use kokkos refactor",
true);
139 useKokkos_ = list.
get(
"use kokkos refactor",
false);
145 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
151 #ifdef HAVE_MUELU_CUDA
152 if (parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStart();
157 std::map<std::string, MsgType> verbMap;
158 verbMap[
"none"] =
None;
159 verbMap[
"low"] =
Low;
160 verbMap[
"medium"] =
Medium;
161 verbMap[
"high"] =
High;
163 verbMap[
"test"] =
Test;
166 std::string verbosityLevel = parameterList_.get<std::string>(
"verbosity",
"medium");
169 "Invalid verbosity level: \"" << verbosityLevel <<
"\"");
173 bool defaultFilter =
false;
178 if (parameterList_.get<
bool>(
"refmaxwell: filter D0",
true) && D0_Matrix_->getNodeMaxNumRowEntries()>2) {
182 fineLevel.
Set(
"A",D0_Matrix_);
183 fineLevel.
setlib(D0_Matrix_->getDomainMap()->lib());
187 ThreshFact->
Build(fineLevel);
191 defaultFilter =
true;
194 if (parameterList_.get<
bool>(
"refmaxwell: filter SM", defaultFilter)) {
195 RCP<Vector> diag = VectorFactory::Build(SM_Matrix_->getRowMap());
197 SM_Matrix_->getLocalDiagCopy(*diag);
203 fineLevel.
Set(
"A",SM_Matrix_);
204 fineLevel.
setlib(SM_Matrix_->getDomainMap()->lib());
207 ThreshFact->
Build(fineLevel);
211 if (parameterList_.get<
bool>(
"refmaxwell: filter M1", defaultFilter)) {
212 RCP<Vector> diag = VectorFactory::Build(M1_Matrix_->getRowMap());
214 M1_Matrix_->getLocalDiagCopy(*diag);
220 fineLevel.
Set(
"A",M1_Matrix_);
221 fineLevel.
setlib(M1_Matrix_->getDomainMap()->lib());
224 ThreshFact->
Build(fineLevel);
232 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
238 for (
size_t i = 0; i<BCrowsKokkos_.size(); i++)
239 if (BCrowsKokkos_(i))
242 for (
size_t i = 0; i<BCcolsKokkos_.size(); i++)
243 if (BCcolsKokkos_(i))
245 GetOStream(
Statistics2) <<
"MueLu::RefMaxwell::compute(): Detected " << BCrowcount <<
" BC rows and " << BCcolcount <<
" BC columns." << std::endl;
254 for (
auto it = BCrows_.begin(); it != BCrows_.end(); ++it)
258 for (
auto it = BCcols_.begin(); it != BCcols_.end(); ++it)
261 GetOStream(
Statistics2) <<
"MueLu::RefMaxwell::compute(): Detected " << BCrowcount <<
" BC rows and " << BCcolcount <<
" BC columns." << std::endl;
266 if(Nullspace_ != null) {
268 TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
270 else if(Nullspace_ == null && Coords_ != null) {
272 typedef typename RealValuedMultiVector::scalar_type realScalarType;
275 Coords_->norm2(norms);
276 for (
size_t i=0;i<Coords_->getNumVectors();i++)
277 norms[i] = ((realMagnitudeType)1.0)/norms[i];
278 Coords_->scale(norms());
279 Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
282 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
285 CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
291 D0_Matrix_->apply(*CoordsSC,*Nullspace_);
294 GetOStream(
Errors) <<
"MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
298 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
313 Level fineLevel, coarseLevel;
319 fineLevel.
Set(
"A",M1_Matrix_);
320 coarseLevel.
Set(
"P",D0_Matrix_);
321 coarseLevel.
setlib(M1_Matrix_->getDomainMap()->lib());
322 fineLevel.
setlib(M1_Matrix_->getDomainMap()->lib());
328 rapList.
set(
"transpose: use implicit", parameterList_.get<
bool>(
"transpose: use implicit",
false));
329 rapList.
set(
"rap: fix zero diagonals", parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
330 rapList.
set(
"rap: triple product", parameterList_.get<
bool>(
"rap: triple product",
false));
334 if (!parameterList_.get<
bool>(
"transpose: use implicit",
false)) {
344 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building special prolongator" << std::endl;
347 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
349 R11_ = Utilities_kokkos::Transpose(*P11_);
358 bool doRebalancing = parameterList_.get<
bool>(
"refmaxwell: subsolves on subcommunicators",
false);
359 int numProcsAH, numProcsA22;
366 int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
367 if (doRebalancing && numProcs > 1) {
368 GlobalOrdinal globalNumRowsAH = AH_->getRowMap()->getGlobalNumElements();
369 GlobalOrdinal globalNumRowsA22 = D0_Matrix_->getDomainMap()->getGlobalNumElements();
370 double ratio = parameterList_.get<
double>(
"refmaxwell: ratio AH / A22 subcommunicators", 1.0);
371 numProcsAH = numProcs * globalNumRowsAH / (globalNumRowsAH + ratio*globalNumRowsA22);
372 numProcsA22 = numProcs * ratio * globalNumRowsA22 / (globalNumRowsAH + ratio*globalNumRowsA22);
373 if (numProcsAH + numProcsA22 < numProcs)
375 if (numProcsAH + numProcsA22 < numProcs)
377 numProcsAH = std::max(numProcsAH, 1);
378 numProcsA22 = std::max(numProcsA22, 1);
380 doRebalancing =
false;
385 Level fineLevel, coarseLevel;
391 coarseLevel.
Set(
"A",AH_);
392 coarseLevel.
Set(
"P",P11_);
393 coarseLevel.
Set(
"R",R11_);
394 coarseLevel.
Set(
"Coordinates",CoordsH_);
395 coarseLevel.
Set(
"number of partitions", numProcsAH);
396 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
398 coarseLevel.
setlib(AH_->getDomainMap()->lib());
399 fineLevel.
setlib(AH_->getDomainMap()->lib());
411 std::string partName = precList11_.get<std::string>(
"repartition: partitioner",
"zoltan2");
413 if (partName ==
"zoltan") {
414 #ifdef HAVE_MUELU_ZOLTAN
421 }
else if (partName ==
"zoltan2") {
422 #ifdef HAVE_MUELU_ZOLTAN2
426 partParams.
set(
"ParameterList", partpartParams);
427 partitioner->SetParameterList(partParams);
436 repartParams.
set(
"repartition: print partition distribution", precList11_.get<
bool>(
"repartition: print partition distribution",
false));
437 repartParams.
set(
"repartition: remap parts", precList11_.get<
bool>(
"repartition: remap parts",
true));
438 repartFactory->SetParameterList(repartParams);
440 repartFactory->SetFactory(
"Partition", partitioner);
444 newPparams.
set(
"type",
"Interpolation");
445 newPparams.
set(
"repartition: rebalance P and R", precList11_.get<
bool>(
"repartition: rebalance P and R",
false));
446 newPparams.
set(
"repartition: use subcommunicators",
true);
447 newPparams.
set(
"repartition: rebalance Nullspace",
false);
449 newP->SetParameterList(newPparams);
450 newP->SetFactory(
"Importer", repartFactory);
455 newRparams.
set(
"type",
"Restriction");
456 newRparams.
set(
"repartition: rebalance P and R", precList11_.get<
bool>(
"repartition: rebalance P and R",
false));
457 newRparams.
set(
"repartition: use subcommunicators",
true);
458 newR->SetParameterList(newRparams);
459 newR->SetFactory(
"Importer", repartFactory);
463 rebAcParams.
set(
"repartition: use subcommunicators",
true);
464 newA->SetParameterList(rebAcParams);
465 newA->SetFactory(
"Importer", repartFactory);
467 coarseLevel.
Request(
"R", newR.get());
468 coarseLevel.
Request(
"P", newP.get());
469 coarseLevel.
Request(
"Importer", repartFactory.get());
470 coarseLevel.
Request(
"A", newA.get());
471 coarseLevel.
Request(
"Coordinates", newP.get());
472 repartFactory->Build(coarseLevel);
474 if (!precList11_.get<
bool>(
"repartition: rebalance P and R",
false))
480 AH_->setObjectLabel(
"RefMaxwell (1,1)");
485 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
489 if (useKokkos_ && precList11_.isParameter(
"aggregation: drop tol") && precList11_.get<
double>(
"aggregation: drop tol") != 0.0) {
490 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
491 <<
"support BlockSize > 1 and drop tol != 0.0" << std::endl;
492 precList11_.set(
"aggregation: drop tol", 0.0);
495 if (!AH_.is_null()) {
496 int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
500 SetProcRankVerbose(oldRank);
507 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
509 D0_Matrix_->resumeFill();
512 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
524 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
528 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
533 Level fineLevel, coarseLevel;
539 fineLevel.
Set(
"A",SM_Matrix_);
540 coarseLevel.
Set(
"P",D0_Matrix_);
541 coarseLevel.
Set(
"Coordinates",Coords_);
543 coarseLevel.
setlib(SM_Matrix_->getDomainMap()->lib());
544 fineLevel.
setlib(SM_Matrix_->getDomainMap()->lib());
550 rapList.
set(
"transpose: use implicit",
false);
551 rapList.
set(
"rap: fix zero diagonals", parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
552 rapList.
set(
"rap: triple product", parameterList_.get<
bool>(
"rap: triple product",
false));
562 coarseLevel.
Set(
"number of partitions", numProcsA22);
563 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
574 std::string partName = precList22_.get<std::string>(
"repartition: partitioner",
"zoltan2");
576 if (partName ==
"zoltan") {
577 #ifdef HAVE_MUELU_ZOLTAN
579 partitioner->SetFactory(
"A", rapFact);
585 }
else if (partName ==
"zoltan2") {
586 #ifdef HAVE_MUELU_ZOLTAN2
590 partParams.
set(
"ParameterList", partpartParams);
591 partitioner->SetParameterList(partParams);
592 partitioner->SetFactory(
"A", rapFact);
601 repartParams.
set(
"repartition: print partition distribution", precList22_.get<
bool>(
"repartition: print partition distribution",
false));
602 repartParams.
set(
"repartition: remap parts", precList22_.get<
bool>(
"repartition: remap parts",
true));
603 repartParams.
set(
"repartition: remap accept partition", AH_.is_null());
604 repartFactory->SetParameterList(repartParams);
605 repartFactory->SetFactory(
"A", rapFact);
607 repartFactory->SetFactory(
"Partition", partitioner);
611 newPparams.
set(
"type",
"Interpolation");
612 newPparams.
set(
"repartition: rebalance P and R", precList22_.get<
bool>(
"repartition: rebalance P and R",
false));
613 newPparams.
set(
"repartition: use subcommunicators",
true);
614 newPparams.
set(
"repartition: rebalance Nullspace",
false);
616 newP->SetParameterList(newPparams);
617 newP->SetFactory(
"Importer", repartFactory);
622 newRparams.
set(
"type",
"Restriction");
623 newRparams.
set(
"repartition: rebalance P and R", precList22_.get<
bool>(
"repartition: rebalance P and R",
false));
624 newRparams.
set(
"repartition: use subcommunicators",
true);
625 newR->SetParameterList(newRparams);
626 newR->SetFactory(
"Importer", repartFactory);
627 newR->SetFactory(
"R", transPFactory);
631 rebAcParams.
set(
"repartition: use subcommunicators",
true);
632 newA->SetParameterList(rebAcParams);
633 newA->SetFactory(
"A", rapFact);
634 newA->SetFactory(
"Importer", repartFactory);
636 coarseLevel.
Request(
"R", newR.get());
637 coarseLevel.
Request(
"P", newP.get());
638 coarseLevel.
Request(
"Importer", repartFactory.get());
639 coarseLevel.
Request(
"A", newA.get());
640 coarseLevel.
Request(
"Coordinates", newP.get());
641 rapFact->
Build(fineLevel,coarseLevel);
642 repartFactory->Build(coarseLevel);
644 if (!precList22_.get<
bool>(
"repartition: rebalance P and R",
false))
650 A22_->setObjectLabel(
"RefMaxwell (2,2)");
656 coarseLevel.
Request(
"R", transPFactory.
get());
659 A22_->setObjectLabel(
"RefMaxwell (2,2)");
664 if (!A22_.is_null()) {
665 int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
669 SetProcRankVerbose(oldRank);
676 if (parameterList_.isType<std::string>(
"smoother: type") &&
677 parameterList_.get<std::string>(
"smoother: type") ==
"hiptmair" &&
681 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || (defined(HAVE_MUELU_INST_DOUBLE_INT_INT)))
682 ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
684 if (smootherList_.isSublist(
"smoother: pre params"))
685 smootherPreList = smootherList_.
sublist(
"smoother: pre params");
686 else if (smootherList_.isSublist(
"smoother: params"))
687 smootherPreList = smootherList_.
sublist(
"smoother: params");
688 hiptmairPreList.
set(
"hiptmair: smoother type 1",
689 smootherPreList.
get<std::string>(
"hiptmair: smoother type 1",
"CHEBYSHEV"));
690 hiptmairPreList.
set(
"hiptmair: smoother type 2",
691 smootherPreList.
get<std::string>(
"hiptmair: smoother type 2",
"CHEBYSHEV"));
692 if(smootherPreList.
isSublist(
"hiptmair: smoother list 1"))
693 hiptmairPreList.
set(
"hiptmair: smoother list 1", smootherPreList.
sublist(
"hiptmair: smoother list 1"));
694 if(smootherPreList.
isSublist(
"hiptmair: smoother list 2"))
695 hiptmairPreList.
set(
"hiptmair: smoother list 2", smootherPreList.
sublist(
"hiptmair: smoother list 2"));
696 hiptmairPreList.
set(
"hiptmair: pre or post",
697 smootherPreList.
get<std::string>(
"hiptmair: pre or post",
"pre"));
698 hiptmairPreList.
set(
"hiptmair: zero starting solution",
699 smootherPreList.
get<
bool>(
"hiptmair: zero starting solution",
true));
701 if (smootherList_.isSublist(
"smoother: post params"))
702 smootherPostList = smootherList_.
sublist(
"smoother: post params");
703 else if (smootherList_.isSublist(
"smoother: params"))
704 smootherPostList = smootherList_.
sublist(
"smoother: params");
705 hiptmairPostList.
set(
"hiptmair: smoother type 1",
706 smootherPostList.
get<std::string>(
"hiptmair: smoother type 1",
"CHEBYSHEV"));
707 hiptmairPostList.
set(
"hiptmair: smoother type 2",
708 smootherPostList.
get<std::string>(
"hiptmair: smoother type 2",
"CHEBYSHEV"));
709 if(smootherPostList.
isSublist(
"hiptmair: smoother list 1"))
710 hiptmairPostList.
set(
"hiptmair: smoother list 1", smootherPostList.
sublist(
"hiptmair: smoother list 1"));
711 if(smootherPostList.
isSublist(
"hiptmair: smoother list 2"))
712 hiptmairPostList.
set(
"hiptmair: smoother list 2", smootherPostList.
sublist(
"hiptmair: smoother list 2"));
713 hiptmairPostList.
set(
"hiptmair: pre or post",
714 smootherPostList.
get<std::string>(
"hiptmair: pre or post",
"post"));
715 hiptmairPostList.
set(
"hiptmair: zero starting solution",
716 smootherPostList.
get<
bool>(
"hiptmair: zero starting solution",
false));
718 typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
724 hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
725 hiptmairPreSmoother_ -> initialize();
726 hiptmairPreSmoother_ -> compute();
728 hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
729 hiptmairPostSmoother_ -> initialize();
730 hiptmairPostSmoother_ -> compute();
731 useHiptmairSmoothing_ =
true;
734 #endif // defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
736 if (parameterList_.isType<std::string>(
"smoother: pre type") && parameterList_.isType<std::string>(
"smoother: post type")) {
737 std::string preSmootherType = parameterList_.get<std::string>(
"smoother: pre type");
738 std::string postSmootherType = parameterList_.get<std::string>(
"smoother: post type");
741 if (parameterList_.isSublist(
"smoother: pre params"))
742 preSmootherList = parameterList_.
sublist(
"smoother: pre params");
743 if (parameterList_.isSublist(
"smoother: post params"))
744 postSmootherList = parameterList_.
sublist(
"smoother: post params");
751 level.
Set(
"A",SM_Matrix_);
752 level.
setlib(SM_Matrix_->getDomainMap()->lib());
760 level.
Request(
"PreSmoother",preSmootherFact.
get());
761 preSmootherFact->
Build(level);
764 level.
Request(
"PostSmoother",postSmootherFact.
get());
765 postSmootherFact->
Build(level);
768 std::string smootherType = parameterList_.
get<std::string>(
"smoother: type",
"CHEBYSHEV");
771 level.SetFactoryManager(factoryHandler);
773 level.setObjectLabel(
"RefMaxwell (1,1)");
774 level.Set(
"A",SM_Matrix_);
775 level.setlib(SM_Matrix_->getDomainMap()->lib());
778 level.Request(
"PreSmoother",SmootherFact.
get());
779 SmootherFact->
Build(level);
781 PostSmoother_ = PreSmoother_;
783 useHiptmairSmoothing_ =
false;
788 P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), 1);
789 if (!ImporterH_.is_null()) {
790 P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
791 P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), 1);
792 P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), 1);
794 P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), 1);
795 D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), 1);
796 if (!Importer22_.is_null()) {
797 D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
798 D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), 1);
799 D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), 1);
801 D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), 1);
802 residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), 1);
804 #ifdef HAVE_MUELU_CUDA
805 if (parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStop();
810 if (dump_matrices_) {
811 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): dumping data" << std::endl;
815 #ifndef HAVE_MUELU_KOKKOS_REFACTOR
816 std::ofstream outBCrows(
"BCrows.mat");
817 std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows,
"\n"));
818 std::ofstream outBCcols(
"BCcols.mat");
819 std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols,
"\n"));
822 std::ofstream outBCrows(
"BCrows.mat");
823 auto BCrows = Kokkos::create_mirror_view (BCrowsKokkos_);
825 for (
size_t i = 0; i < BCrows.size(); i++)
826 outBCrows << BCrows[i] <<
"\n";
828 std::ofstream outBCcols(
"BCcols.mat");
829 auto BCcols = Kokkos::create_mirror_view (BCcolsKokkos_);
831 for (
size_t i = 0; i < BCcols.size(); i++)
832 outBCcols << BCcols[i] <<
"\n";
834 std::ofstream outBCrows(
"BCrows.mat");
835 std::copy(BCrows_.begin(), BCrows_.end(), std::ostream_iterator<LO>(outBCrows,
"\n"));
836 std::ofstream outBCcols(
"BCcols.mat");
837 std::copy(BCcols_.begin(), BCcols_.end(), std::ostream_iterator<LO>(outBCcols,
"\n"));
856 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
871 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
872 size_t dim = Nullspace_->getNumVectors();
873 size_t numLocalRows = SM_Matrix_->getNodeNumRows();
878 bool read_P_from_file = parameterList_.
get(
"refmaxwell: read_P_from_file",
false);
879 if (read_P_from_file) {
882 std::string P_filename = parameterList_.get(
"refmaxwell: P_filename",std::string(
"P.mat"));
885 Level fineLevel, coarseLevel;
891 fineLevel.
Set(
"A",A_nodal_Matrix_);
892 fineLevel.
Set(
"Coordinates",Coords_);
893 fineLevel.
Set(
"DofsPerNode",1);
894 coarseLevel.
setlib(A_nodal_Matrix_->getDomainMap()->lib());
895 fineLevel.
setlib(A_nodal_Matrix_->getDomainMap()->lib());
899 LocalOrdinal NSdim = 1;
900 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
901 nullSpace->putScalar(SC_ONE);
902 fineLevel.
Set(
"Nullspace",nullSpace);
905 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
906 RCP<Factory> dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact;
908 dropFact =
rcp(
new CoalesceDropFactory_kokkos());
909 UncoupledAggFact =
rcp(
new UncoupledAggregationFactory_kokkos());
910 coarseMapFact =
rcp(
new CoarseMapFactory_kokkos());
911 TentativePFact =
rcp(
new TentativePFactory_kokkos());
912 Tfact =
rcp(
new CoordinatesTransferFactory_kokkos());
927 dropFact->
SetFactory(
"UnAmalgamationInfo", amalgFact);
928 double dropTol = parameterList_.get(
"aggregation: drop tol",0.0);
931 UncoupledAggFact->
SetFactory(
"Graph", dropFact);
933 coarseMapFact->
SetFactory(
"Aggregates", UncoupledAggFact);
935 TentativePFact->
SetFactory(
"Aggregates", UncoupledAggFact);
936 TentativePFact->
SetFactory(
"UnAmalgamationInfo", amalgFact);
937 TentativePFact->
SetFactory(
"CoarseMap", coarseMapFact);
939 Tfact->
SetFactory(
"Aggregates", UncoupledAggFact);
940 Tfact->
SetFactory(
"CoarseMap", coarseMapFact);
942 coarseLevel.
Request(
"P",TentativePFact.
get());
943 coarseLevel.
Request(
"Coordinates",Tfact.
get());
945 coarseLevel.
Get(
"P",P_nodal,TentativePFact.
get());
946 coarseLevel.
Get(
"Coordinates",CoordsH_,Tfact.
get());
951 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
955 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
959 P_nodal_temp =
rcp(
new CrsMatrixWrap(targetMap, 0));
962 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
963 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
964 P_nodal_imported = P_nodal_temp->getCrsMatrix();
968 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
970 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
973 using ATS = Kokkos::ArithTraits<SC>;
976 typedef typename Matrix::local_matrix_type KCRS;
977 typedef typename KCRS::device_type device_t;
978 typedef typename KCRS::StaticCrsGraphType graph_t;
979 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
980 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
981 typedef typename KCRS::values_type::non_const_type scalar_view_t;
984 auto localP = P_nodal_imported->getLocalMatrix();
985 auto localD0 = D0_Matrix_->getLocalMatrix();
990 std::string defaultAlgo =
"mat-mat";
991 std::string algo = parameterList_.
get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
993 if (algo ==
"mat-mat") {
994 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
998 auto localD0P = D0_P_nodal->getLocalMatrix();
1004 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1005 lno_nnz_view_t P11colind(
"P11_colind",dim*localD0P.graph.entries.size());
1006 scalar_view_t P11vals(
"P11_vals",dim*localD0P.graph.entries.size());
1009 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1010 KOKKOS_LAMBDA(
const size_t i) {
1011 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1015 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1016 KOKKOS_LAMBDA(
const size_t jj) {
1017 for (
size_t k = 0; k < dim; k++) {
1018 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1019 P11vals(dim*jj+k) = SC_ZERO;
1023 auto localNullspace = Nullspace_->template getLocalView<device_t>();
1026 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1030 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1034 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1035 KOKKOS_LAMBDA(
const size_t i) {
1036 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1037 LO l = localD0.graph.entries(ll);
1038 SC p = localD0.values(ll);
1039 if (ATS::magnitude(p) < tol)
1041 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1042 LO j = localP.graph.entries(jj);
1043 SC v = localP.values(jj);
1044 for (
size_t k = 0; k < dim; k++) {
1046 SC n = localNullspace(i,k);
1048 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1049 if (P11colind(m) == jNew)
1051 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1054 P11vals(m) += half * v * n;
1061 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1062 KOKKOS_LAMBDA(
const size_t i) {
1063 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1064 LO l = localD0.graph.entries(ll);
1065 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1066 LO j = localP.graph.entries(jj);
1067 SC v = localP.values(jj);
1068 for (
size_t k = 0; k < dim; k++) {
1070 SC n = localNullspace(i,k);
1072 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1073 if (P11colind(m) == jNew)
1075 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1078 P11vals(m) += half * v * n;
1086 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1087 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1088 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1091 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1093 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1098 for(
size_t i=0; i<dim; i++) {
1099 nullspaceRCP[i] = Nullspace_->getData(i);
1100 nullspace[i] = nullspaceRCP[i]();
1111 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1112 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1120 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1121 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1129 std::string defaultAlgo =
"gustavson";
1130 std::string algo = parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1132 if (algo ==
"mat-mat") {
1133 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1140 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1147 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1153 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1154 P11Crs->allocateAllValues(dim*D0Pcolind.
size(), P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1161 for (
size_t i = 0; i < numLocalRows+1; i++) {
1162 P11rowptr[i] = dim*D0Prowptr[i];
1167 for (
size_t jj = 0; jj < (size_t) D0Pcolind.
size(); jj++)
1168 for (
size_t k = 0; k < dim; k++) {
1169 P11colind[nnz] = dim*D0Pcolind[jj]+k;
1170 P11vals[nnz] = SC_ZERO;
1175 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1179 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1182 for (
size_t i = 0; i < numLocalRows; i++) {
1183 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1184 LO l = D0colind[ll];
1188 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1191 for (
size_t k = 0; k < dim; k++) {
1193 SC n = nullspace[k][i];
1195 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1196 if (P11colind[m] == jNew)
1198 #ifdef HAVE_MUELU_DEBUG
1201 P11vals[m] += half * v * n;
1208 for (
size_t i = 0; i < numLocalRows; i++) {
1209 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1210 LO l = D0colind[ll];
1211 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1214 for (
size_t k = 0; k < dim; k++) {
1216 SC n = nullspace[k][i];
1218 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1219 if (P11colind[m] == jNew)
1221 #ifdef HAVE_MUELU_DEBUG
1224 P11vals[m] += half * v * n;
1231 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1232 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1234 }
else if (algo ==
"gustavson") {
1236 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1240 size_t nnz_alloc = dim*D0vals_RCP.
size();
1246 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1247 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1254 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1258 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1263 for (
size_t i = 0; i < numLocalRows; i++) {
1265 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1266 LO l = D0colind[ll];
1270 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1273 for (
size_t k = 0; k < dim; k++) {
1275 SC n = nullspace[k][i];
1277 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1278 P11_status[jNew] = nnz;
1279 P11colind[nnz] = jNew;
1280 P11vals[nnz] = half * v * n;
1285 P11vals[P11_status[jNew]] += half * v * n;
1294 P11rowptr[numLocalRows] = nnz;
1298 for (
size_t i = 0; i < numLocalRows; i++) {
1300 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1301 LO l = D0colind[ll];
1302 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1305 for (
size_t k = 0; k < dim; k++) {
1307 SC n = nullspace[k][i];
1309 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1310 P11_status[jNew] = nnz;
1311 P11colind[nnz] = jNew;
1312 P11vals[nnz] = half * v * n;
1317 P11vals[P11_status[jNew]] += half * v * n;
1326 P11rowptr[numLocalRows] = nnz;
1334 P11colind_RCP.
resize(nnz);
1337 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1338 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1340 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1341 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1346 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1351 RCP<Matrix> Matrix1 = MatrixFactory::Build(P11_->getDomainMap(),0);
1352 if (parameterList_.get<
bool>(
"rap: triple product",
false) ==
false) {
1353 RCP<Matrix> C = MatrixFactory::Build(SM_Matrix_->getRowMap(),0);
1360 MultiplyRAP(*P11_,
true, *SM_Matrix_,
false, *P11_,
false, *Matrix1,
true,
true);
1362 if (parameterList_.get<
bool>(
"rap: fix zero diagonals",
true)) {
1367 if(disable_addon_==
true) {
1375 "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1376 "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1379 RCP<Matrix> Zaux = MatrixFactory::Build(M1_Matrix_->getRowMap(),0);
1380 RCP<Matrix> Z = MatrixFactory::Build(D0_Matrix_->getDomainMap(),0);
1388 RCP<Matrix> Matrix2 = MatrixFactory::Build(Z->getDomainMap(),0);
1389 if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
1392 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1395 ZT = Utilities_kokkos::Transpose(*Z);
1401 RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
1402 M0inv_Matrix_->getLocalDiagCopy(*diag);
1403 if (Z->getRowMap()->isSameAs(*(diag->getMap())))
1404 Z->leftScale(*diag);
1406 RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
1407 RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
1409 Z->leftScale(*diag2);
1412 }
else if (parameterList_.get<
bool>(
"rap: triple product",
false) ==
false) {
1413 RCP<Matrix> C2 = MatrixFactory::Build(M0inv_Matrix_->getRowMap(),0);
1421 MultiplyRAP(*Z,
true, *M0inv_Matrix_,
false, *Z,
false, *Matrix2,
true,
true);
1424 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,
false,(Scalar)1.0,*Matrix2,
false,(Scalar)1.0,AH_,GetOStream(
Runtime0));
1425 AH_->fillComplete();
1429 size_t dim = Nullspace_->getNumVectors();
1430 AH_->SetFixedBlockSize(dim);
1431 AH_->setObjectLabel(
"RefMaxwell (1,1)");
1436 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1438 SM_Matrix_ = SM_Matrix_new;
1442 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1446 if (!ImporterH_.is_null()) {
1449 P11res_.swap(P11resTmp_);
1451 if (!AH_.is_null()) {
1458 P11res_->replaceMap(AH_->getRangeMap());
1459 P11x_ ->replaceMap(AH_->getDomainMap());
1460 HierarchyH_->Iterate(*P11res_, *P11x_, 1,
true);
1461 P11x_ ->replaceMap(origXMap);
1462 P11res_->replaceMap(origRhsMap);
1464 if (!ImporterH_.is_null()) {
1467 P11x_.swap(P11xTmp_);
1472 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1475 if (!Importer22_.is_null()) {
1478 D0res_.swap(D0resTmp_);
1480 if (!A22_.is_null()) {
1487 D0res_->replaceMap(A22_->getRangeMap());
1488 D0x_ ->replaceMap(A22_->getDomainMap());
1489 Hierarchy22_->Iterate(*D0res_, *D0x_, 1,
true);
1490 D0x_ ->replaceMap(origXMap);
1491 D0res_->replaceMap(origRhsMap);
1493 if (!Importer22_.is_null()) {
1501 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1516 if (!ImporterH_.is_null()) {
1519 P11res_.swap(P11resTmp_);
1521 if (!Importer22_.is_null()) {
1524 D0res_.swap(D0resTmp_);
1528 if (!AH_.is_null()) {
1535 P11res_->replaceMap(AH_->getRangeMap());
1536 P11x_ ->replaceMap(AH_->getDomainMap());
1537 HierarchyH_->Iterate(*P11res_, *P11x_, 1,
true);
1538 P11x_ ->replaceMap(origXMap);
1539 P11res_->replaceMap(origRhsMap);
1543 if (!A22_.is_null()) {
1550 D0res_->replaceMap(A22_->getRangeMap());
1551 D0x_ ->replaceMap(A22_->getDomainMap());
1552 Hierarchy22_->Iterate(*D0res_, *D0x_, 1,
true);
1553 D0x_ ->replaceMap(origXMap);
1554 D0res_->replaceMap(origRhsMap);
1557 if (!Importer22_.is_null()) {
1563 if (!ImporterH_.is_null()) {
1566 P11x_.swap(P11xTmp_);
1573 X.update(one, *residual_, one);
1575 if (!ImporterH_.is_null()) {
1576 P11res_.swap(P11resTmp_);
1577 P11x_.swap(P11xTmp_);
1579 if (!Importer22_.is_null()) {
1580 D0res_.swap(D0resTmp_);
1588 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1597 X.update(one, *residual_, one);
1598 if (!ImporterH_.is_null()) {
1599 P11res_.swap(P11resTmp_);
1600 P11x_.swap(P11xTmp_);
1608 X.update(one, *residual_, one);
1609 if (!Importer22_.is_null()) {
1610 D0res_.swap(D0resTmp_);
1619 X.update(one, *residual_, one);
1620 if (!ImporterH_.is_null()) {
1621 P11res_.swap(P11resTmp_);
1622 P11x_.swap(P11xTmp_);
1628 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1637 X.update(one, *residual_, one);
1638 if (!Importer22_.is_null()) {
1639 D0res_.swap(D0resTmp_);
1648 X.update(one, *residual_, one);
1649 if (!ImporterH_.is_null()) {
1650 P11res_.swap(P11resTmp_);
1651 P11x_.swap(P11xTmp_);
1659 X.update(one, *residual_, one);
1660 if (!Importer22_.is_null()) {
1661 D0res_.swap(D0resTmp_);
1667 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1679 X.update(one, *residual_, one);
1681 if (!ImporterH_.is_null()) {
1682 P11res_.swap(P11resTmp_);
1683 P11x_.swap(P11xTmp_);
1689 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1698 if (X.getNumVectors() != P11res_->getNumVectors()) {
1699 P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), X.getNumVectors());
1700 if (!ImporterH_.is_null()) {
1701 P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1702 P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), X.getNumVectors());
1703 P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), X.getNumVectors());
1705 P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), X.getNumVectors());
1706 if (!Importer22_.is_null()) {
1707 D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1708 D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), X.getNumVectors());
1709 D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), X.getNumVectors());
1711 D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), X.getNumVectors());
1712 residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), X.getNumVectors());
1720 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
1721 if (useHiptmairSmoothing_) {
1724 hiptmairPreSmoother_->apply(tRHS, tX);
1728 PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
1732 if(mode_==
"additive")
1733 applyInverseAdditive(RHS,X);
1734 else if(mode_==
"121")
1735 applyInverse121(RHS,X);
1736 else if(mode_==
"212")
1737 applyInverse212(RHS,X);
1738 else if(mode_==
"11only")
1739 applyInverse11only(RHS,X);
1740 else if(mode_==
"none") {
1744 applyInverseAdditive(RHS,X);
1750 #if defined(HAVE_MUELU_IFPACK2) && (!defined(HAVE_MUELU_EPETRA) || defined(HAVE_MUELU_INST_DOUBLE_INT_INT))
1751 if (useHiptmairSmoothing_)
1755 hiptmairPostSmoother_->apply(tRHS, tX);
1759 PostSmoother_->Apply(X, RHS,
false);
1765 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1771 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1784 HierarchyH_ = Teuchos::null;
1785 Hierarchy22_ = Teuchos::null;
1786 PreSmoother_ = Teuchos::null;
1787 PostSmoother_ = Teuchos::null;
1788 disable_addon_ =
false;
1792 setParameters(List);
1794 D0_Matrix_ = D0_Matrix;
1795 M0inv_Matrix_ = M0inv_Matrix;
1796 M1_Matrix_ = M1_Matrix;
1798 Nullspace_ = Nullspace;
1801 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1805 std::ostringstream oss;
1811 if (!A22_.is_null())
1812 root = comm->getRank();
1822 oss <<
"\n--------------------------------------------------------------------------------\n" <<
1823 "--- RefMaxwell Summary ---\n"
1824 "--------------------------------------------------------------------------------" << std::endl;
1827 GlobalOrdinal numRows;
1830 SM_Matrix_->getRowMap()->getComm()->barrier();
1832 numRows = SM_Matrix_->getGlobalNumRows();
1833 nnz = SM_Matrix_->getGlobalNumEntries();
1836 int rowspacer = 3;
while (tt != 0) { tt /= 10; rowspacer++; }
1838 int nnzspacer = 2;
while (tt != 0) { tt /= 10; nnzspacer++; }
1840 oss <<
"block " << std::setw(rowspacer) <<
" rows " << std::setw(nnzspacer) <<
" nnz " << std::setw(9) <<
" nnz/row" << std::endl;
1841 oss <<
"(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
1843 if (!A22_.is_null()) {
1845 numRows = A22_->getGlobalNumRows();
1846 nnz = A22_->getGlobalNumEntries();
1848 oss <<
"(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
1853 std::string outstr = oss.str();
1857 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
1859 int strLength = outstr.size();
1860 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
1861 if (comm->getRank() != root)
1862 outstr.resize(strLength);
1863 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
1870 std::ostringstream oss2;
1872 oss2 <<
"Sub-solver distribution over ranks" << std::endl;
1873 oss2 <<
"( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
1875 int numProcs = comm->getSize();
1885 if (!A22_.is_null())
1887 std::vector<char> states(numProcs, 0);
1889 MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
1891 states.push_back(status);
1894 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
1895 for (
int proc = 0; proc < numProcs; proc += rowWidth) {
1896 for (
int j = 0; j < rowWidth; j++)
1897 if (proc + j < numProcs)
1898 if (states[proc+j] == 0)
1900 else if (states[proc+j] == 1)
1902 else if (states[proc+j] == 2)
1909 oss2 <<
" " << proc <<
":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
1920 #define MUELU_REFMAXWELL_SHORT
1921 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
void solveH() const
solve coarse (1,1) block
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
Factory for generating coarse level map. Used by TentativePFactory.
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
void solve22() const
solve (2,2) block
static magnitudeType eps()
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
T & get(const std::string &name, T def_value)
Class that encapsulates external library smoothers.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static void MultiplyRAP(const Matrix &R, bool transposeR, const Matrix &A, bool transposeA, const Matrix &P, bool transposeP, Matrix &Ac, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > ¶ms=null)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
One-liner description of what is happening.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void buildProlongator()
Setup the prolongator for the (1,1)-block.
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
static void CheckRepairMainDiagonal(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &Ac, bool const &repairZeroDiagonals, Teuchos::FancyOStream &fos, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< Scalar >::magnitudeType >::zero())
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
static RCP< Time > getNewTimer(const std::string &name)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
void resize(const size_type n, const T &val=T())
void Build(Level ¤tLevel) const
Creates pre and post smoothers.
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
virtual void SetParameterList(const Teuchos::ParameterList ¶mList)
Set parameters from a parameter list and return with default values.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
void compute()
Setup the preconditioner.
bool isSublist(const std::string &name) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
virtual void setObjectLabel(const std::string &objectLabel)
AmalgamationFactory for subblocks of strided map based amalgamation data.
void applyInverse212(const MultiVector &RHS, MultiVector &X) const
apply 2-1-2 algorithm for 2x2 solve
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void applyInverse121(const MultiVector &RHS, MultiVector &X) const
apply 1-2-1 algorithm for 2x2 solve
Applies permutation to grid transfer operators.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
void SetParameter(const std::string &name, const ParameterEntry &entry)
Set a parameter directly as a ParameterEntry.
Factory for creating a graph base on a given matrix.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > ¶ms=null)
void Build(Level ¤tLevel) const
Build an object with this factory.
void SetLevelID(int levelID)
Set level number.
static Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Read(const std::string &fileName, Xpetra::UnderlyingLib lib, const RCP< const Teuchos::Comm< int > > &comm, bool binary=false)
virtual void SetFactory(const std::string &varName, const RCP< const FactoryBase > &factory)=0
Configuration.
Class for transferring coordinates from a finer level to a coarser one.
Factory for building restriction operators.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void applyInverse11only(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define TEUCHOS_ASSERT(assertion_test)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
Factory for building coarse matrices.
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building uncoupled aggregates.
Factory for building a thresholded operator.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new)
Reset system matrix.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
static const RCP< const NoFactory > getRCP()
Static Get() functions.