46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 #include <Kokkos_Core.hpp>
51 #include <KokkosSparse_CrsMatrix.hpp>
57 #include "MueLu_AmalgamationInfo.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
63 #include "MueLu_Utilities_kokkos.hpp"
68 namespace CoalesceDrop_Kokkos_Details {
70 template<
class LO,
class RowType>
73 ScanFunctor(RowType rows_) : rows(rows_) { }
75 KOKKOS_INLINE_FUNCTION
76 void operator()(
const LO i,
LO& upd,
const bool&
final)
const {
86 template<
class LO,
class GhostedViewType>
87 class ClassicalDropFunctor {
89 typedef typename GhostedViewType::value_type
SC;
90 typedef Kokkos::ArithTraits<SC> ATS;
91 typedef typename ATS::magnitudeType magnitudeType;
97 ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) :
103 KOKKOS_FORCEINLINE_FUNCTION
104 bool operator()(
LO row,
LO col,
SC val)
const {
106 auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0));
107 auto aij2 = ATS::magnitude(val) * ATS::magnitude(val);
109 return (aij2 <= eps*eps * aiiajj);
113 template<
class LO,
class CoordsType>
114 class DistanceFunctor {
116 typedef typename CoordsType::value_type
SC;
117 typedef Kokkos::ArithTraits<SC> ATS;
118 typedef typename ATS::magnitudeType magnitudeType;
121 typedef SC value_type;
124 DistanceFunctor(CoordsType coords_) : coords(coords_) { }
126 KOKKOS_INLINE_FUNCTION
127 magnitudeType distance2(
LO row,
LO col)
const {
128 SC d = ATS::zero(), s;
129 for (
size_t j = 0; j < coords.extent(1); j++) {
130 s = coords(row,j) - coords(col,j);
133 return ATS::magnitude(d);
139 template<
class LO,
class GhostedViewType,
class DistanceFunctor>
140 class DistanceLaplacianDropFunctor {
142 typedef typename GhostedViewType::value_type
SC;
143 typedef Kokkos::ArithTraits<SC> ATS;
144 typedef typename ATS::magnitudeType magnitudeType;
147 DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) :
148 diag(ghostedLaplDiag),
149 distFunctor(distFunctor_),
154 KOKKOS_INLINE_FUNCTION
155 bool operator()(
LO row,
LO col,
SC )
const {
160 typedef typename DistanceFunctor::value_type dSC;
161 typedef Kokkos::ArithTraits<dSC> dATS;
162 auto fval = dATS::one() / distFunctor.distance2(row, col);
164 auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0));
165 auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval);
167 return (aij2 <= eps*eps * aiiajj);
171 GhostedViewType diag;
172 DistanceFunctor distFunctor;
176 template<
class SC,
class LO,
class MatrixType,
class BndViewType,
class DropFunctorType>
177 class ScalarFunctor {
179 typedef typename MatrixType::StaticCrsGraphType graph_type;
180 typedef typename graph_type::row_map_type rows_type;
181 typedef typename graph_type::entries_type cols_type;
182 typedef typename MatrixType::values_type vals_type;
183 typedef Kokkos::ArithTraits<SC> ATS;
184 typedef typename ATS::magnitudeType magnitudeType;
187 ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_,
188 typename rows_type::non_const_type rows_,
189 typename cols_type::non_const_type colsAux_,
190 typename vals_type::non_const_type valsAux_,
191 bool reuseGraph_,
bool lumping_,
SC ) :
194 dropFunctor(dropFunctor_),
198 reuseGraph(reuseGraph_),
201 rowsA = A.graph.row_map;
205 KOKKOS_INLINE_FUNCTION
206 void operator()(
const LO row,
LO& nnz)
const {
207 auto rowView = A.rowConst(row);
208 auto length = rowView.length;
209 auto offset = rowsA(row);
214 for (decltype(length) colID = 0; colID < length; colID++) {
215 LO col = rowView.colidx(colID);
216 SC val = rowView.value (colID);
218 if (!dropFunctor(row, col, rowView.value(colID)) || row == col) {
219 colsAux(offset+rownnz) = col;
221 LO valID = (reuseGraph ? colID : rownnz);
222 valsAux(offset+valID) = val;
230 valsAux(offset+colID) = zero;
236 rows(row+1) = rownnz;
244 valsAux(offset+diagID) += diag;
253 bndNodes(row) = (rownnz == 1);
260 BndViewType bndNodes;
261 DropFunctorType dropFunctor;
265 typename rows_type::non_const_type rows;
266 typename cols_type::non_const_type colsAux;
267 typename vals_type::non_const_type valsAux;
275 template<
class MatrixType,
class NnzType,
class blkSizeType>
276 class Stage1aVectorFunctor {
278 typedef typename MatrixType::ordinal_type
LO;
281 Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) :
282 kokkosMatrix(kokkosMatrix_),
284 blkSize(blkSize_) { }
286 KOKKOS_INLINE_FUNCTION
287 void operator()(
const LO row,
LO& totalnnz)
const {
291 LO nodeRowMaxNonZeros = 0;
292 for (
LO j = 0; j < blkSize; j++) {
293 auto rowView = kokkosMatrix.row(row * blkSize + j);
294 nodeRowMaxNonZeros += rowView.length;
296 nnz(row + 1) = nodeRowMaxNonZeros;
297 totalnnz += nodeRowMaxNonZeros;
302 MatrixType kokkosMatrix;
310 template<
class MatrixType,
class NnzType,
class blkSizeType,
class ColDofType>
311 class Stage1bVectorFunctor {
313 typedef typename MatrixType::ordinal_type
LO;
318 MatrixType kokkosMatrix;
324 Stage1bVectorFunctor(MatrixType kokkosMatrix_,
326 blkSizeType blkSize_,
327 ColDofType coldofs_) :
328 kokkosMatrix(kokkosMatrix_),
334 KOKKOS_INLINE_FUNCTION
335 void operator()(
const LO rowNode)
const {
337 LO pos = nnz(rowNode);
338 for (
LO j = 0; j < blkSize; j++) {
339 auto rowView = kokkosMatrix.row(rowNode * blkSize + j);
340 auto numIndices = rowView.length;
342 for (decltype(numIndices) k = 0; k < numIndices; k++) {
343 auto dofID = rowView.colidx(k);
344 coldofs(pos) = dofID;
355 template<
class MatrixType,
class ColDofNnzType,
class ColDofType,
class Dof2NodeTranslationType,
class BdryNodeType>
356 class Stage1cVectorFunctor {
358 typedef typename MatrixType::ordinal_type
LO;
361 ColDofNnzType coldofnnz;
363 Dof2NodeTranslationType dof2node;
364 ColDofNnzType colnodennz;
365 BdryNodeType bdrynode;
368 Stage1cVectorFunctor(ColDofNnzType coldofnnz_, ColDofType coldofs_, Dof2NodeTranslationType dof2node_, ColDofNnzType colnodennz_, BdryNodeType bdrynode_) :
369 coldofnnz(coldofnnz_),
372 colnodennz(colnodennz_),
373 bdrynode(bdrynode_) {
376 KOKKOS_INLINE_FUNCTION
377 void operator()(
const LO rowNode,
LO& nnz)
const {
378 LO begin = coldofnnz(rowNode);
379 LO end = coldofnnz(rowNode+1);
381 for (
LO i = 0; i < (n-1); i++) {
382 for (
LO j = 0; j < (n-i-1); j++) {
383 if (coldofs(j+begin) > coldofs(j+begin+1)) {
384 LO temp = coldofs(j+begin);
385 coldofs(j+begin) = coldofs(j+begin+1);
386 coldofs(j+begin+1) = temp;
392 for (
LO i = 0; i < n; i++) {
393 LO dofID = coldofs(begin + i);
394 LO nodeID = dof2node(dofID);
395 if(nodeID != lastNodeID) {
397 coldofs(begin+cnt) = nodeID;
402 bdrynode(rowNode) =
true;
404 bdrynode(rowNode) =
false;
405 colnodennz(rowNode+1) = cnt;
412 template<
class MatrixType,
class ColDofNnzType,
class ColDofType,
class ColNodeNnzType,
class ColNodeType>
413 class Stage1dVectorFunctor {
415 typedef typename MatrixType::ordinal_type
LO;
416 typedef typename MatrixType::value_type
SC;
420 ColDofNnzType coldofnnz;
421 ColNodeType colnodes;
422 ColNodeNnzType colnodennz;
425 Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) :
427 coldofnnz(coldofnnz_),
429 colnodennz(colnodennz_) {
432 KOKKOS_INLINE_FUNCTION
433 void operator()(
const LO rowNode)
const {
434 auto dofbegin = coldofnnz(rowNode);
435 auto nodebegin = colnodennz(rowNode);
436 auto nodeend = colnodennz(rowNode+1);
437 auto n = nodeend - nodebegin;
439 for (decltype(nodebegin) i = 0; i < n; i++) {
440 colnodes(nodebegin + i) = coldofs(dofbegin + i);
448 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
449 RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
450 RCP<ParameterList> validParamList =
rcp(
new ParameterList());
452 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
461 validParamList->getEntry(
"aggregation: drop scheme").setValidator(
462 rcp(
new validatorType(Teuchos::tuple<std::string>(
"classical",
"distance laplacian"),
"aggregation: drop scheme")));
464 #undef SET_VALID_ENTRY
465 validParamList->set<
bool > (
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
467 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A");
468 validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
469 validParamList->set< RCP<const FactoryBase> >(
"Coordinates", Teuchos::null,
"Generating factory for Coordinates");
471 return validParamList;
474 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
475 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level ¤tLevel)
const {
476 Input(currentLevel,
"A");
477 Input(currentLevel,
"UnAmalgamationInfo");
479 const ParameterList& pL = GetParameterList();
480 if (pL.get<
bool>(
"lightweight wrap") ==
true) {
481 if (pL.get<std::string>(
"aggregation: drop scheme") ==
"distance laplacian")
482 Input(currentLevel,
"Coordinates");
486 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
487 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
488 Build(Level& currentLevel)
const {
489 FactoryMonitor m(*
this,
"Build", currentLevel);
492 const SC zero = STS::zero();
494 auto A = Get< RCP<Matrix> >(currentLevel,
"A");
495 LO blkSize = A->GetFixedBlockSize();
497 auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel,
"UnAmalgamationInfo");
499 const ParameterList& pL = GetParameterList();
501 bool doLightWeightWrap = pL.get<
bool>(
"lightweight wrap");
502 GetOStream(
Warnings0) <<
"lightweight wrap is deprecated" << std::endl;
504 "MueLu KokkosRefactor only supports \"lightweight wrap\"=\"true\"");
506 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
508 double threshold = pL.get<
double>(
"aggregation: drop tol");
509 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold
510 <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
512 const typename STS::magnitudeType dirichletThreshold =
513 STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
515 GO numDropped = 0, numTotal = 0;
517 RCP<LWGraph_kokkos> graph;
520 typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
521 boundary_nodes_type boundaryNodes;
523 RCP<Matrix> filteredA;
524 if (blkSize == 1 && threshold == zero) {
531 graph =
rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
532 graph->SetBoundaryNodeMap(boundaryNodes);
534 numTotal = A->getNodeNumEntries();
539 }
else if (blkSize == 1 && threshold != zero) {
542 typedef typename Matrix::local_matrix_type local_matrix_type;
543 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
544 typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type;
545 typedef typename kokkos_graph_type::entries_type::non_const_type cols_type;
546 typedef typename local_matrix_type::values_type::non_const_type vals_type;
548 LO numRows = A->getNodeNumRows();
549 auto kokkosMatrix = A->getLocalMatrix();
550 auto nnzA = kokkosMatrix.nnz();
551 auto rowsA = kokkosMatrix.graph.row_map;
553 typedef Kokkos::ArithTraits<SC> ATS;
555 bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
556 bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
558 GetOStream(
Runtime0) <<
"Lumping dropped entries" << std::endl;
561 rows_type rows (
"FA_rows", numRows+1);
562 cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_cols"), nnzA);
565 SubFactoryMonitor m2(*
this,
"CopyMatrix", currentLevel);
568 filteredA = MatrixFactory::Build(A->getCrsGraph());
571 RCP<ParameterList> fillCompleteParams(
new ParameterList);
572 fillCompleteParams->set(
"No Nonlocal Changes",
true);
573 filteredA->fillComplete(fillCompleteParams);
576 valsAux = filteredA->getLocalMatrix().values;
580 valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_vals"), nnzA);
583 typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
587 if (algo ==
"classical") {
589 RCP<Vector> ghostedDiag;
591 SubFactoryMonitor m2(*
this,
"Ghosted diag construction", currentLevel);
592 ghostedDiag = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
597 SubFactoryMonitor m2(*
this,
"MainLoop", currentLevel);
599 auto ghostedDiagView = ghostedDiag->template getLocalView<DeviceType>();
601 CoalesceDrop_Kokkos_Details::ClassicalDropFunctor<LO, decltype(ghostedDiagView)> dropFunctor(ghostedDiagView, threshold);
602 CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
603 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold);
606 scalarFunctor, nnzFA);
609 }
else if (algo ==
"distance laplacian") {
611 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
613 auto uniqueMap = A->getRowMap();
614 auto nonUniqueMap = A->getColMap();
617 RCP<const Import> importer;
619 SubFactoryMonitor m2(*
this,
"Coords Import construction", currentLevel);
620 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
622 RCP<doubleMultiVector> ghostedCoords;
624 SubFactoryMonitor m2(*
this,
"Ghosted coords construction", currentLevel);
629 auto ghostedCoordsView = ghostedCoords->template getLocalView<DeviceType>();
630 CoalesceDrop_Kokkos_Details::DistanceFunctor<LO, decltype(ghostedCoordsView)> distFunctor(ghostedCoordsView);
633 RCP<Vector> localLaplDiag;
635 SubFactoryMonitor m2(*
this,
"Local Laplacian diag construction", currentLevel);
637 localLaplDiag = VectorFactory::Build(uniqueMap);
639 auto localLaplDiagView = localLaplDiag->template getLocalView<DeviceType>();
640 auto kokkosGraph = kokkosMatrix.graph;
642 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0,numRows),
643 KOKKOS_LAMBDA(
const LO row) {
644 auto rowView = kokkosGraph.rowConst(row);
645 auto length = rowView.length;
648 for (decltype(length) colID = 0; colID < length; colID++) {
649 auto col = rowView(colID);
651 d += ATS::one()/distFunctor.distance2(row, col);
653 localLaplDiagView(row,0) = d;
658 RCP<Vector> ghostedLaplDiag;
660 SubFactoryMonitor m2(*
this,
"Ghosted Laplacian diag construction", currentLevel);
661 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
662 ghostedLaplDiag->doImport(*localLaplDiag, *importer,
Xpetra::INSERT);
667 SubFactoryMonitor m2(*
this,
"MainLoop", currentLevel);
669 auto ghostedLaplDiagView = ghostedLaplDiag->template getLocalView<DeviceType>();
671 CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor<LO, decltype(ghostedLaplDiagView), decltype(distFunctor)>
672 dropFunctor(ghostedLaplDiagView, distFunctor, threshold);
673 CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
674 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold);
677 scalarFunctor, nnzFA);
682 numDropped = nnzA - nnzFA;
684 boundaryNodes = bndNodes;
687 SubFactoryMonitor m2(*
this,
"CompressRows", currentLevel);
690 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0,numRows+1),
691 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
703 cols_type cols(Kokkos::ViewAllocateWithoutInitializing(
"FA_cols"), nnzFA);
707 SubFactoryMonitor m2(*
this,
"CompressColsAndVals", currentLevel);
710 KOKKOS_LAMBDA(
const LO i) {
712 LO rowStart = rows(i);
713 LO rowAStart = rowsA(i);
714 size_t rownnz = rows(i+1) - rows(i);
715 for (
size_t j = 0; j < rownnz; j++)
716 cols(rowStart+j) = colsAux(rowAStart+j);
720 SubFactoryMonitor m2(*
this,
"CompressColsAndVals", currentLevel);
722 vals = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_vals"), nnzFA);
725 KOKKOS_LAMBDA(
const LO i) {
726 LO rowStart = rows(i);
727 LO rowAStart = rowsA(i);
728 size_t rownnz = rows(i+1) - rows(i);
729 for (
size_t j = 0; j < rownnz; j++) {
730 cols(rowStart+j) = colsAux(rowAStart+j);
731 vals(rowStart+j) = colsAux(rowAStart+j);
736 kokkos_graph_type kokkosGraph(cols, rows);
739 SubFactoryMonitor m2(*
this,
"LWGraph construction", currentLevel);
741 graph =
rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(),
"filtered graph of A"));
742 graph->SetBoundaryNodeMap(boundaryNodes);
745 numTotal = A->getNodeNumEntries();
750 SubFactoryMonitor m2(*
this,
"LocalMatrix+FillComplete", currentLevel);
752 local_matrix_type localFA = local_matrix_type(
"A", numRows, kokkosMatrix.numCols(), nnzFA, vals, rows, cols);
756 auto filteredACrs = CrsMatrixFactory::Build(A->getRowMap(), A->getColMap(), localFA);
757 filteredACrs->resumeFill();
758 filteredACrs->expertStaticFillComplete(A->getDomainMap(), A->getRangeMap());
760 filteredA =
rcp(
new CrsMatrixWrap(filteredACrs));
763 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
765 if (pL.get<
bool>(
"filtered matrix: reuse eigenvalue")) {
770 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
775 }
else if (blkSize > 1 && threshold == zero) {
784 const RCP<const Map> rowMap = A->getRowMap();
785 const RCP<const Map> colMap = A->getColMap();
792 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
793 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
794 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
795 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
798 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
800 id_translation_type rowTranslation(
"dofId2nodeId",rowTranslationArray.size());
801 id_translation_type colTranslation(
"ov_dofId2nodeId",colTranslationArray.size());
804 for (decltype(rowTranslationArray.size()) i = 0; i < rowTranslationArray.size(); ++i)
805 rowTranslation(i) = rowTranslationArray[i];
806 for (decltype(colTranslationArray.size()) i = 0; i < colTranslationArray.size(); ++i)
807 colTranslation(i) = colTranslationArray[i];
809 blkSize = A->GetFixedBlockSize();
810 LocalOrdinal blkId = -1;
811 LocalOrdinal blkPartSize = A->GetFixedBlockSize();
812 if(A->IsView(
"stridedMaps") ==
true) {
813 const RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
814 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<
const StridedMap>(myMap);
816 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
817 blkId = strMap->getStridedBlockId();
819 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
821 auto kokkosMatrix = A->getLocalMatrix();
824 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
825 typedef typename kokkos_graph_type::row_map_type row_map_type;
827 typedef typename kokkos_graph_type::entries_type entries_type;
830 typename row_map_type::non_const_type dofNnz(
"nnz_map", numNodes + 1);
832 CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor<decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize)> stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize);
833 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols);
835 CoalesceDrop_Kokkos_Details::ScanFunctor<LO,decltype(dofNnz)> scanFunctor(dofNnz);
836 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor);
838 typename entries_type::non_const_type dofcols(
"dofcols", numDofCols);
839 CoalesceDrop_Kokkos_Details::Stage1bVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize), decltype(dofcols)> stage1bFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols);
840 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1b", range_type(0,numNodes), stage1bFunctor);
844 typename row_map_type::non_const_type rows(
"nnz_nodemap", numNodes + 1);
845 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numNodes);
846 CoalesceDrop_Kokkos_Details::Stage1cVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(colTranslation), decltype(bndNodes)> stage1cFunctor(dofNnz, dofcols, colTranslation,rows,bndNodes);
847 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1cFunctor,numNodeCols);
850 CoalesceDrop_Kokkos_Details::ScanFunctor<LO,decltype(rows)> scanNodeFunctor(rows);
851 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor);
854 typename entries_type::non_const_type cols(
"nodecols", numNodeCols);
857 CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(rows), decltype(cols)> stage1dFunctor(dofcols, dofNnz, cols, rows);
858 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor);
859 kokkos_graph_type kokkosGraph(cols, rows);
862 graph =
rcp(
new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
864 boundaryNodes = bndNodes;
865 graph->SetBoundaryNodeMap(boundaryNodes);
866 numTotal = A->getNodeNumEntries();
868 dofsPerNode = blkSize;
873 TEUCHOS_TEST_FOR_EXCEPTION(
true, Exceptions::RuntimeError,
"MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented");
877 GO numLocalBoundaryNodes = 0;
878 GO numGlobalBoundaryNodes = 0;
881 KOKKOS_LAMBDA(
const LO i, GO& n) {
882 if (boundaryNodes(i))
884 }, numLocalBoundaryNodes);
886 auto comm = A->getRowMap()->getComm();
887 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
888 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
891 if ((GetVerbLevel() &
Statistics1) && threshold != zero) {
892 auto comm = A->getRowMap()->getComm();
894 GO numGlobalTotal, numGlobalDropped;
898 if (numGlobalTotal != 0) {
899 GetOStream(
Statistics1) <<
"Number of dropped entries: "
900 << numGlobalDropped <<
"/" << numGlobalTotal
901 <<
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
905 Set(currentLevel,
"DofsPerNode", dofsPerNode);
906 Set(currentLevel,
"Graph", graph);
907 Set(currentLevel,
"A", filteredA);
910 #endif // HAVE_MUELU_KOKKOS_REFACTOR
911 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
One-liner description of what is happening.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Exception throws to report errors in the internal logical of the program.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
#define SET_VALID_ENTRY(name)