46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
   47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
   49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 
   50 #include <Kokkos_Core.hpp> 
   51 #include <KokkosSparse_CrsMatrix.hpp> 
   57 #include "MueLu_AmalgamationInfo_kokkos.hpp" 
   60 #include "MueLu_LWGraph_kokkos.hpp" 
   63 #include "MueLu_Utilities_kokkos.hpp" 
   68   namespace CoalesceDrop_Kokkos_Details { 
 
   70     template<
class LO, 
class RowType>
 
   73       ScanFunctor(RowType rows_) : rows(rows_) { }
 
   75       KOKKOS_INLINE_FUNCTION
 
   76       void operator()(
const LO i, 
LO& upd, 
const bool& 
final)
 const {
 
   86     template<
class LO, 
class GhostedViewType>
 
   87     class ClassicalDropFunctor {
 
   89       typedef typename GhostedViewType::value_type      
SC;
 
   90       typedef          Kokkos::ArithTraits<SC>          ATS;
 
   91       typedef typename ATS::magnitudeType               magnitudeType;
 
   97       ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) :
 
  103       KOKKOS_FORCEINLINE_FUNCTION
 
  104       bool operator()(
LO row, 
LO col, 
SC val)
 const {
 
  106         auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0));   
 
  107         auto aij2   = ATS::magnitude(val)          * ATS::magnitude(val);            
 
  109         return (aij2 <= eps*eps * aiiajj);
 
  113     template<
class LO, 
class CoordsType>
 
  114     class DistanceFunctor {
 
  116       typedef typename CoordsType::value_type           
SC;
 
  117       typedef          Kokkos::ArithTraits<SC>          ATS;
 
  118       typedef typename ATS::magnitudeType               magnitudeType;
 
  121       typedef SC value_type;
 
  124       DistanceFunctor(CoordsType coords_) : coords(coords_) { }
 
  126       KOKKOS_INLINE_FUNCTION
 
  127       magnitudeType distance2(
LO row, 
LO col)
 const {
 
  128         SC d = ATS::zero(), s;
 
  129         for (
size_t j = 0; j < coords.extent(1); j++) {
 
  130           s = coords(row,j) - coords(col,j);
 
  133         return ATS::magnitude(d);
 
  139     template<
class LO, 
class GhostedViewType, 
class DistanceFunctor>
 
  140     class DistanceLaplacianDropFunctor {
 
  142       typedef typename GhostedViewType::value_type      
SC;
 
  143       typedef          Kokkos::ArithTraits<SC>          ATS;
 
  144       typedef typename ATS::magnitudeType               magnitudeType;
 
  147       DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) :
 
  148           diag(ghostedLaplDiag),
 
  149           distFunctor(distFunctor_),
 
  154       KOKKOS_INLINE_FUNCTION
 
  155       bool operator()(
LO row, 
LO col, 
SC )
 const {
 
  160         typedef typename DistanceFunctor::value_type      dSC;
 
  161         typedef          Kokkos::ArithTraits<dSC>         dATS;
 
  162         auto fval = dATS::one() / distFunctor.distance2(row, col);
 
  164         auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0));   
 
  165         auto aij2   = ATS::magnitude(fval)         * ATS::magnitude(fval);           
 
  167         return (aij2 <= eps*eps * aiiajj);
 
  171       GhostedViewType   diag;   
 
  172       DistanceFunctor   distFunctor;
 
  176     template<
class SC, 
class LO, 
class MatrixType, 
class BndViewType, 
class DropFunctorType>
 
  177     class ScalarFunctor {
 
  179       typedef typename MatrixType::StaticCrsGraphType   graph_type;
 
  180       typedef typename graph_type::row_map_type         rows_type;
 
  181       typedef typename graph_type::entries_type         cols_type;
 
  182       typedef typename MatrixType::values_type          vals_type;
 
  183       typedef          Kokkos::ArithTraits<SC>          ATS;
 
  184       typedef typename ATS::magnitudeType               magnitudeType;
 
  187       ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_,
 
  188                     typename rows_type::non_const_type rows_,
 
  189                     typename cols_type::non_const_type colsAux_,
 
  190                     typename vals_type::non_const_type valsAux_,
 
  191                     bool reuseGraph_, 
bool lumping_, 
SC ) :
 
  194           dropFunctor(dropFunctor_),
 
  198           reuseGraph(reuseGraph_),
 
  201         rowsA = A.graph.row_map;
 
  205       KOKKOS_INLINE_FUNCTION
 
  206       void operator()(
const LO row, 
LO& nnz)
 const {
 
  207         auto rowView = A.rowConst(row);
 
  208         auto length  = rowView.length;
 
  209         auto offset  = rowsA(row);
 
  214         for (decltype(length) colID = 0; colID < length; colID++) {
 
  215           LO col = rowView.colidx(colID);
 
  216           SC val = rowView.value (colID);
 
  218           if (!dropFunctor(row, col, rowView.value(colID)) || row == col) {
 
  219             colsAux(offset+rownnz) = col;
 
  221             LO valID = (reuseGraph ? colID : rownnz);
 
  222             valsAux(offset+valID) = val;
 
  230             valsAux(offset+colID) = zero;
 
  236         rows(row+1) = rownnz;
 
  244           valsAux(offset+diagID) += diag;
 
  253         bndNodes(row) = (rownnz == 1);
 
  260       BndViewType                           bndNodes;
 
  261       DropFunctorType                       dropFunctor;
 
  265       typename rows_type::non_const_type    rows;
 
  266       typename cols_type::non_const_type    colsAux;
 
  267       typename vals_type::non_const_type    valsAux;
 
  275     template<
class MatrixType, 
class NnzType, 
class blkSizeType>
 
  276     class Stage1aVectorFunctor {
 
  278       typedef typename MatrixType::ordinal_type 
LO;
 
  281       Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) :
 
  282         kokkosMatrix(kokkosMatrix_),
 
  284         blkSize(blkSize_) { }
 
  286       KOKKOS_INLINE_FUNCTION
 
  287       void operator()(
const LO row, 
LO& totalnnz)
 const {
 
  291         LO nodeRowMaxNonZeros = 0;
 
  292         for (
LO j = 0; j < blkSize; j++) {
 
  293           auto rowView = kokkosMatrix.row(row * blkSize + j);
 
  294           nodeRowMaxNonZeros += rowView.length;
 
  296         nnz(row + 1) = nodeRowMaxNonZeros;
 
  297         totalnnz += nodeRowMaxNonZeros;
 
  302       MatrixType kokkosMatrix; 
 
  310     template<
class MatrixType, 
class NnzType, 
class blkSizeType, 
class ColDofType>
 
  311     class Stage1bVectorFunctor {
 
  313       typedef typename MatrixType::ordinal_type 
LO;
 
  318       MatrixType kokkosMatrix; 
 
  324       Stage1bVectorFunctor(MatrixType kokkosMatrix_,
 
  326                            blkSizeType blkSize_,
 
  327                            ColDofType coldofs_) :
 
  328         kokkosMatrix(kokkosMatrix_),
 
  334       KOKKOS_INLINE_FUNCTION
 
  335       void operator()(
const LO rowNode)
 const {
 
  337         LO pos = nnz(rowNode);
 
  338         for (
LO j = 0; j < blkSize; j++) {
 
  339           auto rowView = kokkosMatrix.row(rowNode * blkSize + j);
 
  340           auto numIndices = rowView.length;
 
  342           for (decltype(numIndices) k = 0; k < numIndices; k++) {
 
  343             auto dofID = rowView.colidx(k);
 
  344             coldofs(pos) = dofID;
 
  355     template<
class MatrixType, 
class ColDofNnzType, 
class ColDofType, 
class Dof2NodeTranslationType, 
class BdryNodeType>
 
  356     class Stage1cVectorFunctor {
 
  358       typedef typename MatrixType::ordinal_type 
LO;
 
  361       ColDofNnzType coldofnnz; 
 
  363       Dof2NodeTranslationType dof2node; 
 
  364       ColDofNnzType colnodennz; 
 
  365       BdryNodeType  bdrynode;  
 
  368       Stage1cVectorFunctor(ColDofNnzType coldofnnz_, ColDofType coldofs_, Dof2NodeTranslationType dof2node_, ColDofNnzType colnodennz_, BdryNodeType bdrynode_) :
 
  369         coldofnnz(coldofnnz_),
 
  372         colnodennz(colnodennz_),
 
  373         bdrynode(bdrynode_) {
 
  376       KOKKOS_INLINE_FUNCTION
 
  377       void operator()(
const LO rowNode, 
LO& nnz)
 const {
 
  378         LO begin = coldofnnz(rowNode);
 
  379         LO end   = coldofnnz(rowNode+1);
 
  381         for (
LO i = 0; i < (n-1); i++) {
 
  382           for (
LO j = 0; j < (n-i-1); j++) {
 
  383             if (coldofs(j+begin) > coldofs(j+begin+1)) {
 
  384               LO temp = coldofs(j+begin);
 
  385               coldofs(j+begin) = coldofs(j+begin+1);
 
  386               coldofs(j+begin+1) = temp;
 
  392         for (
LO i = 0; i < n; i++) {
 
  393           LO dofID  = coldofs(begin + i);
 
  394           LO nodeID = dof2node(dofID);
 
  395           if(nodeID != lastNodeID) {
 
  397             coldofs(begin+cnt) = nodeID;
 
  402           bdrynode(rowNode) = 
true;
 
  404           bdrynode(rowNode) = 
false;
 
  405         colnodennz(rowNode+1) = cnt;
 
  412     template<
class MatrixType, 
class ColDofNnzType, 
class ColDofType, 
class ColNodeNnzType, 
class ColNodeType>
 
  413     class Stage1dVectorFunctor {
 
  415       typedef typename MatrixType::ordinal_type 
LO;
 
  416       typedef typename MatrixType::value_type 
SC;
 
  420       ColDofNnzType  coldofnnz;  
 
  421       ColNodeType colnodes;      
 
  422       ColNodeNnzType colnodennz; 
 
  425       Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) :
 
  427         coldofnnz(coldofnnz_),
 
  429         colnodennz(colnodennz_) {
 
  432       KOKKOS_INLINE_FUNCTION
 
  433       void operator()(
const LO rowNode)
 const {
 
  434         auto dofbegin  = coldofnnz(rowNode);
 
  435         auto nodebegin = colnodennz(rowNode);
 
  436         auto nodeend   = colnodennz(rowNode+1);
 
  437         auto n = nodeend - nodebegin;
 
  439         for (decltype(nodebegin) i = 0; i < n; i++) {
 
  440           colnodes(nodebegin + i) = coldofs(dofbegin + i);
 
  448   template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  449   RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
 const {
 
  450     RCP<ParameterList> validParamList = 
rcp(
new ParameterList());
 
  452 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 
  461       validParamList->getEntry(
"aggregation: drop scheme").setValidator(
 
  462         rcp(
new validatorType(Teuchos::tuple<std::string>(
"classical", 
"distance laplacian"), 
"aggregation: drop scheme")));
 
  464 #undef  SET_VALID_ENTRY 
  465     validParamList->set< 
bool >                  (
"lightweight wrap",            
true, 
"Experimental option for lightweight graph access");
 
  467     validParamList->set< RCP<const FactoryBase> >(
"A",                  Teuchos::null, 
"Generating factory of the matrix A");
 
  468     validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null, 
"Generating factory for UnAmalgamationInfo");
 
  469     validParamList->set< RCP<const FactoryBase> >(
"Coordinates",        Teuchos::null, 
"Generating factory for Coordinates");
 
  471     return validParamList;
 
  474   template <
class Scalar,
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  475   void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level ¤tLevel)
 const {
 
  476     Input(currentLevel, 
"A");
 
  477     Input(currentLevel, 
"UnAmalgamationInfo");
 
  479     const ParameterList& pL = GetParameterList();
 
  480     if (pL.get<
bool>(
"lightweight wrap") == 
true) {
 
  481       if (pL.get<std::string>(
"aggregation: drop scheme") == 
"distance laplacian")
 
  482         Input(currentLevel, 
"Coordinates");
 
  486   template <
class Scalar,
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  487   void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
 
  488   Build(Level& currentLevel)
 const {
 
  489     FactoryMonitor m(*
this, 
"Build", currentLevel);
 
  492     const SC zero    = STS::zero();
 
  494     auto A         = Get< RCP<Matrix> >(currentLevel, 
"A");
 
  495     LO   blkSize   = A->GetFixedBlockSize();
 
  497     auto amalInfo = Get< RCP<AmalgamationInfo_kokkos> >(currentLevel, 
"UnAmalgamationInfo");
 
  499     const ParameterList& pL = GetParameterList();
 
  501     bool doLightWeightWrap = pL.get<
bool>(
"lightweight wrap");
 
  502     GetOStream(
Warnings0) << 
"lightweight wrap is deprecated" << std::endl;
 
  504                                "MueLu KokkosRefactor only supports \"lightweight wrap\"=\"true\"");
 
  506     std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
 
  508     double threshold = pL.get<
double>(
"aggregation: drop tol");
 
  509     GetOStream(
Runtime0) << 
"algorithm = \"" << algo << 
"\": threshold = " << threshold
 
  510                          << 
", blocksize = " << A->GetFixedBlockSize() << std::endl;
 
  512     const typename STS::magnitudeType dirichletThreshold =
 
  513         STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
 
  515     GO numDropped = 0, numTotal = 0;
 
  517     RCP<LWGraph_kokkos> graph;
 
  520     typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
 
  521     boundary_nodes_type boundaryNodes;
 
  523     RCP<Matrix> filteredA;
 
  524     if (blkSize == 1 && threshold == zero) {
 
  531       graph = 
rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), 
"graph of A"));
 
  532       graph->SetBoundaryNodeMap(boundaryNodes);
 
  534       numTotal = A->getNodeNumEntries();
 
  539     } 
else if (blkSize == 1 && threshold != zero) {
 
  542       typedef typename Matrix::local_matrix_type          local_matrix_type;
 
  543       typedef typename LWGraph_kokkos::local_graph_type   kokkos_graph_type;
 
  544       typedef typename kokkos_graph_type::row_map_type::non_const_type    rows_type;
 
  545       typedef typename kokkos_graph_type::entries_type::non_const_type    cols_type;
 
  546       typedef typename local_matrix_type::values_type::non_const_type     vals_type;
 
  548       LO   numRows      = A->getNodeNumRows();
 
  549       auto kokkosMatrix = A->getLocalMatrix();
 
  550       auto nnzA  = kokkosMatrix.nnz();
 
  551       auto rowsA = kokkosMatrix.graph.row_map;
 
  553       typedef Kokkos::ArithTraits<SC>     ATS;
 
  555       bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
 
  556       bool lumping    = pL.get<
bool>(
"filtered matrix: use lumping");
 
  558         GetOStream(
Runtime0) << 
"Lumping dropped entries" << std::endl;
 
  561       rows_type rows   (
"FA_rows",     numRows+1);
 
  562       cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_cols"), nnzA);
 
  565         SubFactoryMonitor m2(*
this, 
"CopyMatrix", currentLevel);
 
  568         filteredA = MatrixFactory::Build(A->getCrsGraph());
 
  571         RCP<ParameterList> fillCompleteParams(
new ParameterList);
 
  572         fillCompleteParams->set(
"No Nonlocal Changes", 
true);
 
  573         filteredA->fillComplete(fillCompleteParams);
 
  576         valsAux = filteredA->getLocalMatrix().values;
 
  580         valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_aux_vals"), nnzA);
 
  583       typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
 
  587         if (algo == 
"classical") {
 
  589           RCP<Vector> ghostedDiag;
 
  591             SubFactoryMonitor m2(*
this, 
"Ghosted diag construction", currentLevel);
 
  592             ghostedDiag     = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
 
  597             SubFactoryMonitor m2(*
this, 
"MainLoop", currentLevel);
 
  599             auto ghostedDiagView = ghostedDiag->template getLocalView<DeviceType>();
 
  601             CoalesceDrop_Kokkos_Details::ClassicalDropFunctor<LO, decltype(ghostedDiagView)> dropFunctor(ghostedDiagView, threshold);
 
  602             CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
 
  603                 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold);
 
  606                                     scalarFunctor, nnzFA);
 
  609         } 
else if (algo == 
"distance laplacian") {
 
  611           auto coords = Get<RCP<doubleMultiVector> >(currentLevel, 
"Coordinates");
 
  613           auto uniqueMap    = A->getRowMap();
 
  614           auto nonUniqueMap = A->getColMap();
 
  617           RCP<const Import> importer;
 
  619             SubFactoryMonitor m2(*
this, 
"Coords Import construction", currentLevel);
 
  620             importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
 
  622           RCP<doubleMultiVector> ghostedCoords;
 
  624             SubFactoryMonitor m2(*
this, 
"Ghosted coords construction", currentLevel);
 
  629           auto ghostedCoordsView = ghostedCoords->template getLocalView<DeviceType>();
 
  630           CoalesceDrop_Kokkos_Details::DistanceFunctor<LO, decltype(ghostedCoordsView)> distFunctor(ghostedCoordsView);
 
  633           RCP<Vector> localLaplDiag;
 
  635             SubFactoryMonitor m2(*
this, 
"Local Laplacian diag construction", currentLevel);
 
  637             localLaplDiag = VectorFactory::Build(uniqueMap);
 
  639             auto localLaplDiagView = localLaplDiag->template getLocalView<DeviceType>();
 
  640             auto kokkosGraph = kokkosMatrix.graph;
 
  642             Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0,numRows),
 
  643               KOKKOS_LAMBDA(
const LO row) {
 
  644                 auto rowView = kokkosGraph.rowConst(row);
 
  645                 auto length  = rowView.length;
 
  648                 for (decltype(length) colID = 0; colID < length; colID++) {
 
  649                   auto col = rowView(colID);
 
  651                     d += ATS::one()/distFunctor.distance2(row, col);
 
  653                 localLaplDiagView(row,0) = d;
 
  658           RCP<Vector> ghostedLaplDiag;
 
  660             SubFactoryMonitor m2(*
this, 
"Ghosted Laplacian diag construction", currentLevel);
 
  661             ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
 
  662             ghostedLaplDiag->doImport(*localLaplDiag, *importer, 
Xpetra::INSERT);
 
  667             SubFactoryMonitor m2(*
this, 
"MainLoop", currentLevel);
 
  669             auto ghostedLaplDiagView = ghostedLaplDiag->template getLocalView<DeviceType>();
 
  671             CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor<LO, decltype(ghostedLaplDiagView), decltype(distFunctor)>
 
  672                 dropFunctor(ghostedLaplDiagView, distFunctor, threshold);
 
  673             CoalesceDrop_Kokkos_Details::ScalarFunctor<SC, LO, local_matrix_type, decltype(bndNodes), decltype(dropFunctor)>
 
  674                 scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold);
 
  677                                     scalarFunctor, nnzFA);
 
  682       numDropped = nnzA - nnzFA;
 
  684       boundaryNodes = bndNodes;
 
  687         SubFactoryMonitor m2(*
this, 
"CompressRows", currentLevel);
 
  690         Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0,numRows+1),
 
  691           KOKKOS_LAMBDA(
const LO i, LO& update, 
const bool& final_pass) {
 
  703       cols_type cols(Kokkos::ViewAllocateWithoutInitializing(
"FA_cols"), nnzFA);
 
  707         SubFactoryMonitor m2(*
this, 
"CompressColsAndVals", currentLevel);
 
  710           KOKKOS_LAMBDA(
const LO i) {
 
  712             LO rowStart  = rows(i);
 
  713             LO rowAStart = rowsA(i);
 
  714             size_t rownnz = rows(i+1) - rows(i);
 
  715             for (
size_t j = 0; j < rownnz; j++)
 
  716               cols(rowStart+j) = colsAux(rowAStart+j);
 
  720         SubFactoryMonitor m2(*
this, 
"CompressColsAndVals", currentLevel);
 
  722         vals = vals_type(Kokkos::ViewAllocateWithoutInitializing(
"FA_vals"), nnzFA);
 
  725           KOKKOS_LAMBDA(
const LO i) {
 
  726             LO rowStart  = rows(i);
 
  727             LO rowAStart = rowsA(i);
 
  728             size_t rownnz = rows(i+1) - rows(i);
 
  729             for (
size_t j = 0; j < rownnz; j++) {
 
  730               cols(rowStart+j) = colsAux(rowAStart+j);
 
  731               vals(rowStart+j) = colsAux(rowAStart+j);
 
  736       kokkos_graph_type kokkosGraph(cols, rows);
 
  739         SubFactoryMonitor m2(*
this, 
"LWGraph construction", currentLevel);
 
  741         graph = 
rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), 
"filtered graph of A"));
 
  742         graph->SetBoundaryNodeMap(boundaryNodes);
 
  745       numTotal = A->getNodeNumEntries();
 
  750         SubFactoryMonitor m2(*
this, 
"LocalMatrix+FillComplete", currentLevel);
 
  752         local_matrix_type localFA = local_matrix_type(
"A", numRows, kokkosMatrix.numCols(), nnzFA, vals, rows, cols);
 
  756         auto filteredACrs = CrsMatrixFactory::Build(A->getRowMap(), A->getColMap(), localFA);
 
  757         filteredACrs->resumeFill();  
 
  758         filteredACrs->expertStaticFillComplete(A->getDomainMap(), A->getRangeMap());
 
  760         filteredA = 
rcp(
new CrsMatrixWrap(filteredACrs));
 
  763       filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
 
  765       if (pL.get<
bool>(
"filtered matrix: reuse eigenvalue")) {
 
  770         filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
 
  775     } 
else if (blkSize > 1 && threshold == zero) {
 
  784       const RCP<const Map> rowMap = A->getRowMap();
 
  785       const RCP<const Map> colMap = A->getColMap();
 
  792       const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
 
  793       const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
 
  794       Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); 
 
  795       Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
 
  798       LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
 
  800       id_translation_type rowTranslation(
"dofId2nodeId",rowTranslationArray.size());
 
  801       id_translation_type colTranslation(
"ov_dofId2nodeId",colTranslationArray.size());
 
  804       for (decltype(rowTranslationArray.size()) i = 0; i < rowTranslationArray.size(); ++i)
 
  805         rowTranslation(i) = rowTranslationArray[i];
 
  806       for (decltype(colTranslationArray.size()) i = 0; i < colTranslationArray.size(); ++i)
 
  807         colTranslation(i) = colTranslationArray[i];
 
  809       blkSize = A->GetFixedBlockSize();  
 
  812       if(A->IsView(
"stridedMaps") == 
true) {
 
  813         const RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
 
  814         const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<
const StridedMap>(myMap);
 
  816         blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
 
  817         blkId   = strMap->getStridedBlockId();
 
  819           blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
 
  821       auto kokkosMatrix = A->getLocalMatrix(); 
 
  824       typedef typename LWGraph_kokkos::local_graph_type   kokkos_graph_type;
 
  825       typedef typename kokkos_graph_type::row_map_type    row_map_type;
 
  827       typedef typename kokkos_graph_type::entries_type    entries_type;
 
  830       typename row_map_type::non_const_type dofNnz(
"nnz_map", numNodes + 1);
 
  832       CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor<decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize)> stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize);
 
  833       Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols);
 
  835       CoalesceDrop_Kokkos_Details::ScanFunctor<LO,decltype(dofNnz)> scanFunctor(dofNnz);
 
  836       Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor);
 
  838       typename entries_type::non_const_type dofcols(
"dofcols", numDofCols); 
 
  839       CoalesceDrop_Kokkos_Details::Stage1bVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize), decltype(dofcols)> stage1bFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols);
 
  840       Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1b", range_type(0,numNodes), stage1bFunctor);
 
  844       typename row_map_type::non_const_type rows(
"nnz_nodemap", numNodes + 1);
 
  845       typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numNodes);
 
  846       CoalesceDrop_Kokkos_Details::Stage1cVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(colTranslation), decltype(bndNodes)> stage1cFunctor(dofNnz, dofcols, colTranslation,rows,bndNodes);
 
  847       Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1cFunctor,numNodeCols);
 
  850       CoalesceDrop_Kokkos_Details::ScanFunctor<LO,decltype(rows)> scanNodeFunctor(rows);
 
  851       Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor);
 
  854       typename entries_type::non_const_type cols(
"nodecols", numNodeCols);
 
  857       CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(rows), decltype(cols)> stage1dFunctor(dofcols, dofNnz, cols, rows);
 
  858       Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor);
 
  859       kokkos_graph_type kokkosGraph(cols, rows);
 
  862       graph = 
rcp(
new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, 
"amalgamated graph of A"));
 
  864       boundaryNodes = bndNodes;
 
  865       graph->SetBoundaryNodeMap(boundaryNodes);
 
  866       numTotal = A->getNodeNumEntries();
 
  868       dofsPerNode = blkSize;
 
  873       TEUCHOS_TEST_FOR_EXCEPTION(
true, Exceptions::RuntimeError, 
"MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented");
 
  877       GO numLocalBoundaryNodes  = 0;
 
  878       GO numGlobalBoundaryNodes = 0;
 
  881         KOKKOS_LAMBDA(
const LO i, GO& n) {
 
  882           if (boundaryNodes(i))
 
  884         }, numLocalBoundaryNodes);
 
  886       auto comm = A->getRowMap()->getComm();
 
  887       MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
 
  888       GetOStream(
Statistics1) << 
"Detected " << numGlobalBoundaryNodes << 
" Dirichlet nodes" << std::endl;
 
  891     if ((GetVerbLevel() & 
Statistics1) && threshold != zero) {
 
  892       auto comm = A->getRowMap()->getComm();
 
  894       GO numGlobalTotal, numGlobalDropped;
 
  898       if (numGlobalTotal != 0) {
 
  899         GetOStream(
Statistics1) << 
"Number of dropped entries: " 
  900             << numGlobalDropped << 
"/" << numGlobalTotal
 
  901             << 
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << 
"%)" << std::endl;
 
  905     Set(currentLevel, 
"DofsPerNode",  dofsPerNode);
 
  906     Set(currentLevel, 
"Graph",        graph);
 
  907     Set(currentLevel, 
"A",            filteredA);
 
  910 #endif // HAVE_MUELU_KOKKOS_REFACTOR 
  911 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
Important warning messages (one line) 
 
#define MueLu_sumAll(rcpComm, in, out)
 
MueLu::DefaultLocalOrdinal LocalOrdinal
 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
 
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
 
One-liner description of what is happening. 
 
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
 
Exception throws to report errors in the internal logical of the program. 
 
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
 
#define SET_VALID_ENTRY(name)