46 #ifndef MUELU_TENTATIVEPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_TENTATIVEPFACTORY_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
55 #include "MueLu_Aggregates_kokkos.hpp"
56 #include "MueLu_AmalgamationFactory_kokkos.hpp"
57 #include "MueLu_AmalgamationInfo_kokkos.hpp"
58 #include "MueLu_CoarseMapFactory_kokkos.hpp"
60 #include "MueLu_NullspaceFactory_kokkos.hpp"
61 #include "MueLu_PerfUtils.hpp"
63 #include "MueLu_Utilities_kokkos.hpp"
69 template<
class LocalOrdinal,
class View>
70 class ReduceMaxFunctor{
72 ReduceMaxFunctor(View view) : view_(view) { }
74 KOKKOS_INLINE_FUNCTION
80 KOKKOS_INLINE_FUNCTION
87 KOKKOS_INLINE_FUNCTION
96 template<
class LOType,
class GOType,
class SCType,
class DeviceType,
class NspType,
class aggRowsType,
class maxAggDofSizeType,
class agg2RowMapLOType,
class statusType,
class rowsType,
class rowsAuxType,
class colsAuxType,
class valsAuxType>
97 class LocalQRDecompFunctor {
103 typedef typename DeviceType::execution_space execution_space;
104 typedef Kokkos::ArithTraits<SC> ATS;
105 typedef typename ATS::magnitudeType Magnitude;
115 maxAggDofSizeType maxAggDofSize;
116 agg2RowMapLOType agg2RowMapLO;
117 statusType statusAtomic;
124 LocalQRDecompFunctor(NspType fineNS_, NspType coarseNS_, aggRowsType aggRows_, maxAggDofSizeType maxAggDofSize_, agg2RowMapLOType agg2RowMapLO_, statusType statusAtomic_, rowsType rows_, rowsAuxType rowsAux_, colsAuxType colsAux_, valsAuxType valsAux_,
bool doQRStep_) :
128 maxAggDofSize(maxAggDofSize_),
129 agg2RowMapLO(agg2RowMapLO_),
130 statusAtomic(statusAtomic_),
138 KOKKOS_INLINE_FUNCTION
140 auto agg = thread.league_rank();
143 auto aggSize = aggRows(agg+1) - aggRows(agg);
145 const SC one = ATS::one();
146 const SC two = one + one;
147 const SC zero = ATS::zero();
148 const auto zeroM = ATS::magnitude(zero);
151 int n = fineNS.extent(1);
154 Xpetra::global_size_t offset = agg * n;
159 shared_matrix r(thread.team_shmem(), m, n);
160 for (
int j = 0; j < n; j++)
161 for (
int k = 0; k < m; k++)
162 r(k,j) = fineNS(agg2RowMapLO(aggRows(agg)+k),j);
165 for (
int i = 0; i < m; i++) {
166 for (
int j = 0; j < n; j++)
167 printf(
" %5.3lf ", r(i,j));
173 shared_matrix q(thread.team_shmem(), m, m);
175 bool isSingular =
false;
179 for (
int i = 0; i < m; i++) {
180 for (
int j = 0; j < m; j++)
185 for (
int k = 0; k < n; k++) {
187 Magnitude s = zeroM, norm, norm_x;
188 for (
int i = k+1; i < m; i++)
189 s +=
pow(ATS::magnitude(r(i,k)), 2);
190 norm =
sqrt(
pow(ATS::magnitude(r(k,k)), 2) + s);
199 norm_x =
sqrt(
pow(ATS::magnitude(r(k,k)), 2) + s);
200 if (norm_x == zeroM) {
208 for (
int i = k; i < m; i++)
212 for (
int j = k+1; j < n; j++) {
215 for (
int i = k; i < m; i++)
216 si += r(i,k) * r(i,j);
217 for (
int i = k; i < m; i++)
218 r(i,j) -= two*si * r(i,k);
222 for (
int j = k; j < m; j++) {
225 for (
int i = k; i < m; i++)
226 si += r(i,k) * qt(i,j);
227 for (
int i = k; i < m; i++)
228 qt(i,j) -= two*si * r(i,k);
233 for (
int i = k+1; i < m; i++)
239 for (
int i = 0; i < m; i++)
240 for (
int j = 0; j < i; j++) {
248 for (
int j = 0; j < n; j++)
249 for (
int k = 0; k <= j; k++)
250 coarseNS(offset+k,j) = r(k,j);
253 statusAtomic(1) =
true;
288 for (
int j = 0; j < n; j++)
289 for (
int k = 0; k < n; k++)
291 coarseNS(offset+k,j) = r(k,j);
293 coarseNS(offset+k,j) = (k == j ? one : zero);
296 for (
int i = 0; i < m; i++)
297 for (
int j = 0; j < n; j++)
298 q(i,j) = (j == i ? one : zero);
302 for (
int j = 0; j < m; j++) {
303 LO localRow = agg2RowMapLO(aggRows(agg)+j);
304 size_t rowStart = rowsAux(localRow);
306 for (
int k = 0; k < n; k++) {
308 if (q(j,k) != zero) {
309 colsAux(rowStart+lnnz) = offset + k;
310 valsAux(rowStart+lnnz) = q(j,k);
314 rows(localRow+1) = lnnz;
320 for (
int i = 0; i < m; i++) {
321 for (
int j = 0; j < n; j++)
322 printf(
" %5.3lf ", coarseNS(i,j));
327 for (
int i = 0; i < aggSize; i++) {
328 for (
int j = 0; j < aggSize; j++)
329 printf(
" %5.3lf ", q(i,j));
343 for (
int j = 0; j < m; j++) {
344 LO localRow = agg2RowMapLO(aggRows(agg)+j);
345 size_t rowStart = rowsAux(localRow);
347 for (
int k = 0; k < n; k++) {
348 const SC qr_jk = fineNS(localRow,k);
351 colsAux(rowStart+lnnz) = offset + k;
352 valsAux(rowStart+lnnz) = qr_jk;
356 rows(localRow+1) = lnnz;
360 for (
int j = 0; j < n; j++)
361 coarseNS(offset+j,j) = one;
368 size_t team_shmem_size(
int )
const {
370 int m = maxAggDofSize;
371 int n = fineNS.extent(1);
372 return shared_matrix::shmem_size(m, n) +
373 shared_matrix::shmem_size(m, m);
381 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
382 RCP<const ParameterList> TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
383 RCP<ParameterList> validParamList =
rcp(
new ParameterList());
385 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
388 #undef SET_VALID_ENTRY
390 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A");
391 validParamList->set< RCP<const FactoryBase> >(
"Aggregates", Teuchos::null,
"Generating factory of the aggregates");
392 validParamList->set< RCP<const FactoryBase> >(
"Nullspace", Teuchos::null,
"Generating factory of the nullspace");
393 validParamList->set< RCP<const FactoryBase> >(
"Scaled Nullspace", Teuchos::null,
"Generating factory of the scaled nullspace");
394 validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory of UnAmalgamationInfo");
395 validParamList->set< RCP<const FactoryBase> >(
"CoarseMap", Teuchos::null,
"Generating factory of the coarse map");
396 validParamList->set< RCP<const FactoryBase> >(
"Coordinates", Teuchos::null,
"Generating factory of the coordinates");
399 ParameterList norecurse;
400 norecurse.disableRecursiveValidation();
401 validParamList->set<ParameterList> (
"matrixmatrix: kernel params", norecurse,
"MatrixMatrix kernel parameters");
403 return validParamList;
406 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
407 void TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level& fineLevel, Level& )
const {
409 const ParameterList& pL = GetParameterList();
411 std::string nspName =
"Nullspace";
412 if(pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
414 Input(fineLevel,
"A");
415 Input(fineLevel,
"Aggregates");
416 Input(fineLevel, nspName);
417 Input(fineLevel,
"UnAmalgamationInfo");
418 Input(fineLevel,
"CoarseMap");
419 if( fineLevel.GetLevelID() == 0 &&
421 pL.get<
bool>(
"tentative: build coarse coordinates") ) {
422 bTransferCoordinates_ =
true;
423 Input(fineLevel,
"Coordinates");
424 }
else if (bTransferCoordinates_) {
425 Input(fineLevel,
"Coordinates");
429 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
430 void TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& fineLevel, Level& coarseLevel)
const {
431 return BuildP(fineLevel, coarseLevel);
434 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
435 void TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel)
const {
436 FactoryMonitor m(*
this,
"Build", coarseLevel);
439 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
440 const ParameterList& pL = GetParameterList();
441 std::string nspName =
"Nullspace";
442 if(pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
444 auto A = Get< RCP<Matrix> > (fineLevel,
"A");
445 auto aggregates = Get< RCP<Aggregates_kokkos> > (fineLevel,
"Aggregates");
446 auto amalgInfo = Get< RCP<AmalgamationInfo_kokkos> > (fineLevel,
"UnAmalgamationInfo");
447 auto fineNullspace = Get< RCP<MultiVector> > (fineLevel, nspName);
448 auto coarseMap = Get< RCP<const Map> > (fineLevel,
"CoarseMap");
449 RCP<RealValuedMultiVector> fineCoords;
450 if(bTransferCoordinates_) {
451 fineCoords = Get< RCP<RealValuedMultiVector> >(fineLevel,
"Coordinates");
454 RCP<Matrix> Ptentative;
455 RCP<MultiVector> coarseNullspace;
456 RCP<RealValuedMultiVector> coarseCoords;
458 if(bTransferCoordinates_) {
459 ArrayView<const GO> elementAList = coarseMap->getNodeElementList();
460 GO indexBase = coarseMap->getIndexBase();
463 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null)
464 blkSize = rcp_dynamic_cast<
const StridedMap>(coarseMap)->getFixedBlockSize();
466 Array<GO> elementList;
467 ArrayView<const GO> elementListView;
471 elementListView = elementAList;
474 auto numElements = elementAList.size() / blkSize;
476 elementList.resize(numElements);
479 for (LO i = 0; i < Teuchos::as<LO>(numElements); i++)
480 elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
482 elementListView = elementList;
485 auto uniqueMap = fineCoords->getMap();
487 elementListView, indexBase, coarseMap->getComm());
488 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordMap, fineCoords->getNumVectors());
491 RCP<RealValuedMultiVector> ghostedCoords = fineCoords;
492 if (aggregates->AggregatesCrossProcessors()) {
493 auto nonUniqueMap = aggregates->GetMap();
494 auto importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
496 ghostedCoords = RealValuedMultiVectorFactory::Build(nonUniqueMap, fineCoords->getNumVectors());
497 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
502 auto aggGraph = aggregates->GetGraph();
503 auto numAggs = aggGraph.numRows();
505 auto fineCoordsView = fineCoords ->template getLocalView<DeviceType>();
506 auto coarseCoordsView = coarseCoords->template getLocalView<DeviceType>();
510 SubFactoryMonitor m2(*
this,
"AverageCoords", coarseLevel);
512 const auto dim = fineCoords->getNumVectors();
514 typename AppendTrait<decltype(fineCoordsView), Kokkos::RandomAccess>::type fineCoordsRandomView = fineCoordsView;
515 for (
size_t j = 0; j < dim; j++) {
517 KOKKOS_LAMBDA(
const LO i) {
521 auto aggregate = aggGraph.rowConst(i);
523 coordinate_type sum = 0.0;
524 for (
size_t colID = 0; colID < static_cast<size_t>(aggregate.length); colID++)
525 sum += fineCoordsRandomView(aggregate(colID),j);
527 coarseCoordsView(i,j) = sum / aggregate.length;
533 if (!aggregates->AggregatesCrossProcessors())
534 BuildPuncoupled(coarseLevel, A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
536 BuildPcoupled (A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
546 if (A->IsView(
"stridedMaps") ==
true)
547 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
549 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
551 if(bTransferCoordinates_) {
552 Set(coarseLevel,
"Coordinates", coarseCoords);
554 Set(coarseLevel,
"Nullspace", coarseNullspace);
555 Set(coarseLevel,
"P", Ptentative);
558 RCP<ParameterList> params =
rcp(
new ParameterList());
559 params->set(
"printLoadBalancingInfo",
true);
564 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
565 void TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
566 BuildPuncoupled(Level& coarseLevel, RCP<Matrix> A, RCP<Aggregates_kokkos> aggregates,
567 RCP<AmalgamationInfo_kokkos> amalgInfo, RCP<MultiVector> fineNullspace,
568 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative,
569 RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
570 auto rowMap = A->getRowMap();
571 auto colMap = A->getColMap();
573 const size_t numRows = rowMap->getNodeNumElements();
574 const size_t NSDim = fineNullspace->getNumVectors();
576 typedef Kokkos::ArithTraits<SC> ATS;
577 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
578 const SC zero = ATS::zero(), one = ATS::one();
582 typename Aggregates_kokkos::local_graph_type aggGraph;
584 SubFactoryMonitor m2(*
this,
"Get Aggregates graph", coarseLevel);
585 aggGraph = aggregates->GetGraph();
587 auto aggRows = aggGraph.row_map;
588 auto aggCols = aggGraph.entries;
595 SubFactoryMonitor m2(*
this,
"Check good map", coarseLevel);
596 goodMap = isGoodMap(*rowMap, *colMap);
600 "MueLu: TentativePFactory_kokkos: for now works only with good maps "
601 "(i.e. \"matching\" row and column maps)");
610 LO fullBlockSize, blockID, stridingOffset, stridedBlockSize;
612 amalgInfo->GetStridingInformation(fullBlockSize, blockID, stridingOffset, stridedBlockSize, indexBase);
613 GO globalOffset = amalgInfo->GlobalOffset();
616 auto procWinner = aggregates->GetProcWinner() ->template getLocalView<DeviceType>();
617 auto vertex2AggId = aggregates->GetVertex2AggId()->template getLocalView<DeviceType>();
618 const size_t numAggregates = aggregates->GetNumAggregates();
620 int myPID = aggregates->GetMap()->getComm()->getRank();
625 typedef typename Aggregates_kokkos::aggregates_sizes_type::non_const_type AggSizeType;
626 AggSizeType aggDofSizes;
628 if (stridedBlockSize == 1) {
629 SubFactoryMonitor m2(*
this,
"Calc AggSizes", coarseLevel);
632 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates+1);
634 auto sizesConst = aggregates->ComputeAggregateSizes();
638 SubFactoryMonitor m2(*
this,
"Calc AggSizes", coarseLevel);
641 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates + 1);
643 auto nodeMap = aggregates->GetMap()->getLocalMap();
644 auto dofMap = colMap->getLocalMap();
647 KOKKOS_LAMBDA(
const LO agg) {
648 auto aggRowView = aggGraph.rowConst(agg);
651 for (LO colID = 0; colID < aggRowView.length; colID++) {
652 GO nodeGID = nodeMap.getGlobalElement(aggRowView(colID));
654 for (LO k = 0; k < stridedBlockSize; k++) {
655 GO dofGID = (nodeGID - indexBase) * fullBlockSize + k + indexBase + globalOffset + stridingOffset;
657 if (dofMap.getLocalElement(dofGID) != INVALID)
661 aggDofSizes(agg+1) = size;
668 ReduceMaxFunctor<LO,decltype(aggDofSizes)> reduceMax(aggDofSizes);
669 Kokkos::parallel_reduce(
"MueLu:TentativePF:Build:max_agg_size", range_type(0, aggDofSizes.extent(0)), reduceMax, maxAggSize);
673 Kokkos::parallel_scan(
"MueLu:TentativePF:Build:aggregate_sizes:stage1_scan", range_type(0,numAggregates+1),
674 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
675 update += aggDofSizes(i);
677 aggDofSizes(i) = update;
684 SubFactoryMonitor m2(*
this,
"Create Agg2RowMap", coarseLevel);
686 AggSizeType aggOffsets(Kokkos::ViewAllocateWithoutInitializing(
"aggOffsets"), numAggregates);
689 Kokkos::parallel_for(
"MueLu:TentativePF:Build:createAgg2RowMap", range_type(0, vertex2AggId.extent(0)),
690 KOKKOS_LAMBDA(
const LO lnode) {
691 if (procWinner(lnode, 0) == myPID) {
693 auto aggID = vertex2AggId(lnode,0);
695 auto offset = Kokkos::atomic_fetch_add( &aggOffsets(aggID), stridedBlockSize );
699 for (LO k = 0; k < stridedBlockSize; k++)
700 agg2RowMapLO(offset + k) = lnode*stridedBlockSize + k;
707 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
710 auto fineNS = fineNullspace ->template getLocalView<DeviceType>();
711 auto coarseNS = coarseNullspace->template getLocalView<DeviceType>();
715 typedef typename Xpetra::Matrix<SC,LO,GO,NO>::local_matrix_type local_matrix_type;
716 typedef typename local_matrix_type::row_map_type::non_const_type rows_type;
717 typedef typename local_matrix_type::index_type::non_const_type cols_type;
718 typedef typename local_matrix_type::values_type::non_const_type vals_type;
723 status_type status(
"status");
725 typename AppendTrait<decltype(fineNS), Kokkos::RandomAccess>::type fineNSRandom = fineNS;
726 typename AppendTrait<status_type, Kokkos::Atomic> ::type statusAtomic = status;
728 const ParameterList& pL = GetParameterList();
729 const bool& doQRStep = pL.get<
bool>(
"tentative: calculate qr");
731 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
733 GetOStream(
Warnings0) <<
"TentativePFactor : for nontrivial nullspace, this may degrade performance" << std::endl;
736 size_t nnzEstimate = numRows * NSDim;
737 rows_type rowsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_rows"), numRows+1);
738 cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_cols"), nnzEstimate);
739 vals_type valsAux(
"Ptent_aux_vals", nnzEstimate);
740 rows_type rows(
"Ptent_rows", numRows+1);
743 SubFactoryMonitor m2(*
this,
"Stage 0 (InitViews)", coarseLevel);
748 KOKKOS_LAMBDA(
const LO row) {
749 rowsAux(row) = row*NSDim;
752 KOKKOS_LAMBDA(
const LO j) {
753 colsAux(j) = INVALID;
763 SubFactoryMonitor m2(*
this,
"Stage 1 (LocalQR)", coarseLevel);
774 auto agg = thread.league_rank();
777 LO aggSize = aggRows(agg+1) - aggRows(agg);
782 auto norm = impl_ATS::magnitude(zero);
787 for (decltype(aggSize) k = 0; k < aggSize; k++) {
788 auto dnorm = impl_ATS::magnitude(fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0));
795 statusAtomic(1) =
true;
800 coarseNS(agg, 0) = norm;
803 for (decltype(aggSize) k = 0; k < aggSize; k++) {
804 LO localRow = agg2RowMapLO(aggRows(agg)+k);
805 SC localVal = fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0) / norm;
807 rows(localRow+1) = 1;
808 colsAux(localRow) = agg;
809 valsAux(localRow) = localVal;
814 typename status_type::HostMirror statusHost = Kokkos::create_mirror_view(status);
816 for (decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
818 std::ostringstream oss;
819 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
821 case 0: oss <<
"!goodMap is not implemented";
break;
822 case 1: oss <<
"fine level NS part has a zero column";
break;
824 throw Exceptions::RuntimeError(oss.str());
830 auto agg = thread.league_rank();
833 LO aggSize = aggRows(agg+1) - aggRows(agg);
836 coarseNS(agg, 0) = one;
839 for (decltype(aggSize) k = 0; k < aggSize; k++) {
840 LO localRow = agg2RowMapLO(aggRows(agg)+k);
841 SC localVal = fineNSRandom(agg2RowMapLO(aggRows(agg)+k),0);
843 rows(localRow+1) = 1;
844 colsAux(localRow) = agg;
845 valsAux(localRow) = localVal;
852 KOKKOS_LAMBDA(
const LO i,
size_t &nnz_count) {
853 nnz_count += rows(i);
866 SubFactoryMonitor m2 = SubFactoryMonitor(*
this, doQRStep ?
"Stage 1 (LocalQR)" :
"Stage 1 (Fill coarse nullspace and tentative P)", coarseLevel);
872 decltype(aggDofSizes ), decltype(maxAggSize), decltype(agg2RowMapLO),
873 decltype(statusAtomic), decltype(rows), decltype(rowsAux), decltype(colsAux),
875 localQRFunctor(fineNSRandom, coarseNS, aggDofSizes, maxAggSize, agg2RowMapLO, statusAtomic,
876 rows, rowsAux, colsAux, valsAux, doQRStep);
880 typename status_type::HostMirror statusHost = Kokkos::create_mirror_view(status);
882 for (decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
884 std::ostringstream oss;
885 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
887 case 0: oss <<
"!goodMap is not implemented";
break;
888 case 1: oss <<
"fine level NS part has a zero column";
break;
890 throw Exceptions::RuntimeError(oss.str());
901 if (nnz != nnzEstimate) {
904 SubFactoryMonitor m2(*
this,
"Stage 2 (CompressRows)", coarseLevel);
906 Kokkos::parallel_scan(
"MueLu:TentativePF:Build:compress_rows", range_type(0,numRows+1),
907 KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
915 SubFactoryMonitor m2(*
this,
"Stage 2 (CompressCols)", coarseLevel);
917 cols = cols_type(
"Ptent_cols", nnz);
918 vals = vals_type(
"Ptent_vals", nnz);
924 KOKKOS_LAMBDA(
const LO i) {
925 LO rowStart = rows(i);
928 for (
auto j = rowsAux(i); j < rowsAux(i+1); j++)
929 if (colsAux(j) != INVALID) {
930 cols(rowStart+lnnz) = colsAux(j);
931 vals(rowStart+lnnz) = valsAux(j);
943 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
947 SubFactoryMonitor m2(*
this,
"Stage 3 (LocalMatrix+FillComplete)", coarseLevel);
949 local_matrix_type lclMatrix = local_matrix_type(
"A", numRows, coarseMap->getNodeNumElements(), nnz, vals, rows, cols);
952 RCP<ParameterList> FCparams;
953 if (pL.isSublist(
"matrixmatrix: kernel params"))
954 FCparams =
rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
956 FCparams =
rcp(
new ParameterList);
959 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
960 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") +
toString(levelID));
962 auto PtentCrs = CrsMatrixFactory::Build(lclMatrix, rowMap, coarseMap, coarseMap, A->getDomainMap());
963 Ptentative =
rcp(
new CrsMatrixWrap(PtentCrs));
967 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
968 void TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
969 BuildPcoupled(RCP<Matrix> , RCP<Aggregates_kokkos> ,
970 RCP<AmalgamationInfo_kokkos> , RCP<MultiVector> ,
971 RCP<const Map> , RCP<Matrix>& ,
972 RCP<MultiVector>& )
const {
973 throw Exceptions::RuntimeError(
"MueLu: Construction of coupled tentative P is not implemented");
976 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
977 bool TentativePFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
978 isGoodMap(
const Map& rowMap,
const Map& colMap)
const {
979 auto rowLocalMap = rowMap.getLocalMap();
980 auto colLocalMap = colMap.getLocalMap();
982 const size_t numRows = rowLocalMap.getNodeNumElements();
983 const size_t numCols = colLocalMap.getNodeNumElements();
985 if (numCols < numRows)
990 KOKKOS_LAMBDA(
const LO i,
size_t &diff) {
991 diff += (rowLocalMap.getGlobalElement(i) != colLocalMap.getGlobalElement(i));
994 return (numDiff == 0);
999 #define MUELU_TENTATIVEPFACTORY_KOKKOS_SHORT
1000 #endif // HAVE_MUELU_KOKKOS_REFACTOR
1001 #endif // MUELU_TENTATIVEPFACTORY_KOKKOS_DEF_HPP
Important warning messages (one line)
MueLu::DefaultLocalOrdinal LocalOrdinal
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
KOKKOS_FORCEINLINE_FUNCTION constexpr pair< T1, T2 > make_pair(T1 x, T2 y)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > pow(const complex< RealType > &x, const RealType &e)
static const NoFactory * get()
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > sqrt(const complex< RealType > &x)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Description of what is happening (more verbose)
#define SET_VALID_ENTRY(name)