46 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
51 #include <Xpetra_Map.hpp>
53 #include <Xpetra_MultiVectorFactory.hpp>
58 #include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
59 #include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
61 #include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
62 #include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
63 #include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
64 #include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
67 #include "MueLu_LWGraph_kokkos.hpp"
68 #include "MueLu_Aggregates.hpp"
72 #include "KokkosGraph_Distance2ColorHandle.hpp"
73 #include "KokkosGraph_Distance2Color.hpp"
74 #include "KokkosGraph_MIS2.hpp"
78 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
80 : bDefinitionPhase_(true) {}
82 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
90 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
95 validParamList->
getEntry(
"aggregation: ordering").
setValidator(
rcp(
new validatorType(Teuchos::tuple<std::string>(
"natural",
"graph",
"random"),
"aggregation: ordering")));
106 SET_VALID_ENTRY(
"aggregation: error on nodes with no on-rank neighbors");
110 #undef SET_VALID_ENTRY
114 validParamList->
set<
RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
117 validParamList->
set<std::string>(
"OnePt aggregate map name",
"",
"Name of input map for single node aggregates. (default='')");
118 validParamList->
set<std::string>(
"OnePt aggregate map factory",
"",
"Generating factory of (DOF) map for single node aggregates.");
121 return validParamList;
124 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
126 Input(currentLevel,
"Graph");
127 Input(currentLevel,
"DofsPerNode");
132 std::string mapOnePtName = pL.
get<std::string>(
"OnePt aggregate map name");
133 if (mapOnePtName.length() > 0) {
134 std::string mapOnePtFactName = pL.
get<std::string>(
"OnePt aggregate map factory");
135 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
144 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
153 bDefinitionPhase_ =
false;
155 if (pL.
get<
int>(
"aggregation: max agg size") == -1)
156 pL.
set(
"aggregation: max agg size", INT_MAX);
171 TEUCHOS_TEST_FOR_EXCEPTION(pL.
get<
bool>(
"aggregation: match ML phase1"), std::invalid_argument,
"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
172 TEUCHOS_TEST_FOR_EXCEPTION(pL.
get<
bool>(
"aggregation: match ML phase2b"), std::invalid_argument,
"Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation");
174 std::string mapOnePtName = pL.
get<std::string>(
"OnePt aggregate map name");
176 if (mapOnePtName.length()) {
177 std::string mapOnePtFactName = pL.
get<std::string>(
"OnePt aggregate map factory");
178 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
182 OnePtMap = currentLevel.
Get<
RCP<Map> >(mapOnePtName, mapOnePtFact.
get());
190 aggregates->setObjectLabel(
"UC");
195 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type> aggStat(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"),
197 Kokkos::deep_copy(aggStat,
READY);
209 Kokkos::parallel_for(
210 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
211 Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numRows),
212 KOKKOS_LAMBDA(
const local_ordinal_type nodeIdx) {
213 if (dirichletBoundaryMap(nodeIdx) ==
true) {
219 LO nDofsPerNode = Get<LO>(currentLevel,
"DofsPerNode");
223 if (OnePtMap != Teuchos::null) {
224 typename Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>::HostMirror aggStatHost = Kokkos::create_mirror_view(aggStat);
225 Kokkos::deep_copy(aggStatHost, aggStat);
227 for (
LO i = 0; i < numRows; i++) {
229 GO grid = (graph->
GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
231 for (
LO kr = 0; kr < nDofsPerNode; kr++)
232 if (OnePtMap->isNodeGlobalElement(grid + kr))
233 aggStatHost(i) =
ONEPT;
236 Kokkos::deep_copy(aggStat, aggStatHost);
240 GO numGlobalRows = 0;
244 LO numNonAggregatedNodes = numRows;
245 std::string aggAlgo = pL.
get<std::string>(
"aggregation: coloring algorithm");
246 if (aggAlgo ==
"mis2 coarsening" || aggAlgo ==
"mis2 aggregation") {
249 using device_t =
typename graph_t::device_type;
250 using exec_space =
typename device_t::execution_space;
251 using rowmap_t =
typename graph_t::row_map_type;
252 using colinds_t =
typename graph_t::entries_type;
253 using lno_t =
typename colinds_t::non_const_value_type;
257 typename colinds_t::non_const_type labels;
259 if (aggAlgo ==
"mis2 coarsening") {
261 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
262 }
else if (aggAlgo ==
"mis2 aggregation") {
264 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
266 auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
267 auto procWinner = aggregates->GetProcWinner()->getDeviceLocalView(Xpetra::Access::OverwriteAll);
268 int rank = comm->getRank();
269 Kokkos::parallel_for(
270 Kokkos::RangePolicy<exec_space>(0, numRows),
271 KOKKOS_LAMBDA(lno_t i) {
272 procWinner(i, 0) = rank;
273 if (aggStat(i) ==
READY) {
275 vertex2AggId(i, 0) = labels(i);
278 numNonAggregatedNodes = 0;
279 aggregates->SetNumAggregates(numAggs);
290 using KernelHandle = KokkosKernels::Experimental::
291 KokkosKernelsHandle<
typename graph_t::row_map_type::value_type,
292 typename graph_t::entries_type::value_type,
293 typename graph_t::entries_type::value_type,
294 typename graph_t::device_type::execution_space,
295 typename graph_t::device_type::memory_space,
296 typename graph_t::device_type::memory_space>;
299 kh.create_distance2_graph_coloring_handle();
302 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
312 if (pL.
get<
bool>(
"aggregation: deterministic") ==
true) {
313 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
315 }
else if (aggAlgo ==
"serial") {
316 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
318 }
else if (aggAlgo ==
"default") {
319 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT);
321 }
else if (aggAlgo ==
"vertex based") {
322 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB);
324 }
else if (aggAlgo ==
"vertex based bit set") {
325 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT);
327 }
else if (aggAlgo ==
"edge filtering") {
328 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF);
330 }
else if (aggAlgo ==
"net based bit set") {
331 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT);
334 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
338 typename graph_t::row_map_type aRowptrs = graph->
getRowPtrs();
339 typename graph_t::entries_type aColinds = graph->
getEntries();
344 SubFactoryMonitor sfm2(*
this,
"Algo \"Graph Coloring\": KokkosGraph Call", currentLevel);
345 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
349 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
350 aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
353 kh.destroy_distance2_graph_coloring_handle();
357 GetOStream(
Statistics1) <<
" num colors: " << aggregates->GetGraphNumColors() << std::endl;
359 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
360 for (
size_t a = 0; a < algos_.size(); a++) {
361 std::string phase = algos_[a]->description();
364 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
365 algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
366 algos_[a]->SetProcRankVerbose(oldRank);
369 GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
370 GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
371 MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
374 double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
375 if (aggPercent > 99.99 && aggPercent < 100.00) {
382 GetOStream(
Statistics1) <<
" aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) <<
" (phase), " << std::fixed
383 << std::setprecision(2) << numGlobalAggregated <<
"/" << numGlobalRows <<
" [" << aggPercent <<
"%] (total)\n"
384 <<
" remaining : " << numGlobalRows - numGlobalAggregated <<
"\n"
385 <<
" aggregates : " << numGlobalAggs - numGlobalAggsPrev <<
" (phase), " << numGlobalAggs <<
" (total)" << std::endl;
386 numGlobalAggregatedPrev = numGlobalAggregated;
387 numGlobalAggsPrev = numGlobalAggs;
394 aggregates->AggregatesCrossProcessors(
false);
395 aggregates->ComputeAggregateSizes(
true );
397 Set(currentLevel,
"Aggregates", aggregates);
#define MueLu_sumAll(rcpComm, in, out)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
KOKKOS_INLINE_FUNCTION row_type getRowPtrs() const
Return the row pointers of the local graph.
Container class for aggregation information.
typename std::conditional< OnHost, typename local_graph_device_type::HostMirror, local_graph_device_type >::type local_graph_type
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define SET_VALID_ENTRY(name)
void Build(Level ¤tLevel) const
Build aggregates.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
static const NoFactory * get()
UncoupledAggregationFactory_kokkos()
Constructor.
typename device_type::execution_space execution_space
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
LocalOrdinal local_ordinal_type
KOKKOS_INLINE_FUNCTION entries_type getEntries() const
Return the list entries in the local graph.
void DeclareInput(Level ¤tLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename device_type::memory_space memory_space
const RCP< const Map > GetDomainMap() const
Exception throws to report errors in the internal logical of the program.
ParameterEntry & getEntry(const std::string &name)
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const Teuchos::Comm< int > > GetComm() const