MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <stdlib.h>
14 
15 #include <Kokkos_Core.hpp>
16 
17 #include <KokkosBatched_LU_Decl.hpp>
18 #include <KokkosBatched_LU_Serial_Impl.hpp>
19 #include <KokkosBatched_Trsm_Decl.hpp>
20 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
21 #include <KokkosBatched_Util.hpp>
22 #include <KokkosSparse_CrsMatrix.hpp>
23 
24 #include <Xpetra_CrsMatrixWrap.hpp>
25 #include <Xpetra_ImportFactory.hpp>
26 #include <Xpetra_Matrix.hpp>
27 #include <Xpetra_MultiVectorFactory.hpp>
28 #include <Xpetra_VectorFactory.hpp>
29 
31 #include "MueLu_MasterList.hpp"
32 #include "MueLu_Monitor.hpp"
34 
35 namespace MueLu {
36 
37 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
38 
39 RCP<const ParameterList>
42  RCP<ParameterList> validParamList = rcp(new ParameterList());
43 
44  std::string name = "semicoarsen: coarsen rate";
45  validParamList->setEntry(name, MasterList::getEntry(name));
46  validParamList->set<RCP<const FactoryBase>>(
47  "A", Teuchos::null, "Generating factory of the matrix A");
48  validParamList->set<RCP<const FactoryBase>>(
49  "Nullspace", Teuchos::null, "Generating factory of the nullspace");
50  validParamList->set<RCP<const FactoryBase>>(
51  "Coordinates", Teuchos::null, "Generating factory for coordinates");
52 
53  validParamList->set<RCP<const FactoryBase>>(
54  "LineDetection_VertLineIds", Teuchos::null,
55  "Generating factory for LineDetection vertical line ids");
56  validParamList->set<RCP<const FactoryBase>>(
57  "LineDetection_Layers", Teuchos::null,
58  "Generating factory for LineDetection layer ids");
59  validParamList->set<RCP<const FactoryBase>>(
60  "CoarseNumZLayers", Teuchos::null,
61  "Generating factory for number of coarse z-layers");
62 
63  return validParamList;
64 }
65 
66 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
67  class Node>
70  Node>::
71  DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
72  Input(fineLevel, "A");
73  Input(fineLevel, "Nullspace");
74 
75  Input(fineLevel, "LineDetection_VertLineIds");
76  Input(fineLevel, "LineDetection_Layers");
77  Input(fineLevel, "CoarseNumZLayers");
78 
79  // check whether fine level coordinate information is available.
80  // If yes, request the fine level coordinates and generate coarse coordinates
81  // during the Build call
82  if (fineLevel.GetLevelID() == 0) {
83  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
84  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
85  bTransferCoordinates_ = true;
86  }
87  } else if (bTransferCoordinates_ == true) {
88  // on coarser levels we check the default factory providing "Coordinates"
89  // or the factory declared to provide "Coordinates"
90  // first, check which factory is providing coordinate information
91  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
92  if (myCoordsFact == Teuchos::null) {
93  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
94  }
95  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
96  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
97  }
98  }
99 }
100 
101 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
102  class Node>
104  Build(Level &fineLevel,
105  Level &coarseLevel)
106  const {
107  return BuildP(fineLevel, coarseLevel);
108 }
109 
110 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
111  class Node>
113  BuildP(Level &fineLevel,
114  Level &coarseLevel)
115  const {
116  FactoryMonitor m(*this, "Build", coarseLevel);
117 
118  // obtain general variables
119  RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
120  RCP<MultiVector> fineNullspace =
121  Get<RCP<MultiVector>>(fineLevel, "Nullspace");
122 
123  // get user-provided coarsening rate parameter (constant over all levels)
124  const ParameterList &pL = GetParameterList();
125  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
127  CoarsenRate < 2, Exceptions::RuntimeError,
128  "semicoarsen: coarsen rate must be greater than 1");
129 
130  // collect general input data
131  LO BlkSize = A->GetFixedBlockSize();
132  RCP<const Map> rowMap = A->getRowMap();
133  LO Ndofs = rowMap->getLocalNumElements();
134  LO Nnodes = Ndofs / BlkSize;
135 
136  // collect line detection information generated by the LineDetectionFactory
137  // instance
138  LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
139  Teuchos::ArrayRCP<LO> TVertLineId =
140  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
141  Teuchos::ArrayRCP<LO> TLayerId =
142  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
143 
144  // compute number of coarse layers
146  "Cannot coarsen further");
147  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148  LO CoarseNumZLayers =
149  (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150  if (CoarseNumZLayers < 1)
151  CoarseNumZLayers = 1;
152 
153  // generate transfer operator with semicoarsening
154  RCP<Matrix> P;
155  RCP<MultiVector> coarseNullspace;
156  BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157  CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
158  coarseNullspace);
159 
160  // Store number of coarse z-layers on the coarse level container
161  // This information is used by the LineDetectionAlgorithm
162  // TODO get rid of the NoFactory
163  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
165 
166  // store semicoarsening transfer on coarse level
167  Set(coarseLevel, "P", P);
168  Set(coarseLevel, "Nullspace", coarseNullspace);
169 
170  // transfer coordinates
171  if (bTransferCoordinates_) {
172  SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
173  typedef Xpetra::MultiVector<
175  xdMV;
176  RCP<xdMV> fineCoords = Teuchos::null;
177  if (fineLevel.GetLevelID() == 0 &&
178  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179  fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
180  } else {
181  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182  if (myCoordsFact == Teuchos::null) {
183  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
184  }
185  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
186  fineCoords =
187  fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
188  }
189  }
190 
191  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193  "No Coordinates found provided by the user.");
194 
195  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197  "Three coordinates arrays must be supplied if "
198  "line detection orientation not given.");
200  fineCoords->getDataNonConst(0);
202  fineCoords->getDataNonConst(1);
204  fineCoords->getDataNonConst(2);
205 
206  // determine the maximum and minimum z coordinate value on the current
207  // processor.
208  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
210  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
212  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
215  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
217  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218  for (auto it = z.begin(); it != z.end(); ++it) {
219  if (*it > zval_max)
220  zval_max = *it;
221  if (*it < zval_min)
222  zval_min = *it;
223  }
224 
225  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226 
228  myZLayerCoords = Teuchos::arcp<
229  typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
230  myCoarseZLayers);
231  if (myCoarseZLayers == 1) {
232  myZLayerCoords[0] = zval_min;
233  } else {
234  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235  (zval_max - zval_min) / (myCoarseZLayers - 1);
236  for (LO k = 0; k < myCoarseZLayers; ++k) {
237  myZLayerCoords[k] = k * dz;
238  }
239  }
240 
241  // Note, that the coarse level node coordinates have to be in vertical
242  // ordering according to the numbering of the vertical lines
243 
244  // number of vertical lines on current node:
245  LO numVertLines = Nnodes / FineNumZLayers;
246  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
247 
248  RCP<const Map> coarseCoordMap = MapFactory::Build(
249  fineCoords->getMap()->lib(),
251  Teuchos::as<size_t>(numLocalCoarseNodes),
252  fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254  typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255  NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256  coarseCoords->putScalar(-1.0);
258  coarseCoords->getDataNonConst(0);
260  coarseCoords->getDataNonConst(1);
262  coarseCoords->getDataNonConst(2);
263 
264  // loop over all vert line indices (stop as soon as possible)
265  LO cntCoarseNodes = 0;
266  for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
267  // vertical line id in *vt
268  LO curVertLineId = TVertLineId[vt];
269 
270  if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271  cy[curVertLineId * myCoarseZLayers] == -1.0) {
272  // loop over all local myCoarseZLayers
273  for (LO n = 0; n < myCoarseZLayers; ++n) {
274  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
277  }
278  cntCoarseNodes += myCoarseZLayers;
279  }
280 
281  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283  "number of coarse nodes is inconsistent.");
284  if (cntCoarseNodes == numLocalCoarseNodes)
285  break;
286  }
287 
288  // set coarse level coordinates
289  Set(coarseLevel, "Coordinates", coarseCoords);
290  } /* end bool bTransferCoordinates */
291 }
292 
293 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
294  class Node>
297  Node>::
298  BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
299  const LO DofsPerNode, const LO NFLayers,
300  const LO NCLayers, const ArrayRCP<LO> LayerId,
301  const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
302  const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303  RCP<MultiVector> &coarseNullspace) const {
304  SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
305 #if KOKKOS_VERSION >= 40799
306  using impl_SC = typename KokkosKernels::ArithTraits<SC>::val_type;
307 #else
308  using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
309 #endif
310 #if KOKKOS_VERSION >= 40799
311  using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
312 #else
313  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
314 #endif
315  using LOView1D = Kokkos::View<LO *, DeviceType>;
316  using LOView2D = Kokkos::View<LO **, DeviceType>;
317 
318  // Construct a map from fine level column to layer ids (including ghost nodes)
319  // Note: this is needed to sum all couplings within a layer
320  const auto FCol2LayerVector =
322  const auto localTemp =
323  Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
324  RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
325  if (importer == Teuchos::null)
326  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
327  {
328  // Fill local temp with layer ids and fill ghost nodes
329  const auto localTempHost = localTemp->getLocalViewHost(Xpetra::Access::ReadWrite);
330  for (int row = 0; row < NFRows; row++)
331  localTempHost(row, 0) = LayerId[row / DofsPerNode];
332  const auto localTempView = localTemp->getLocalViewDevice(Xpetra::Access::ReadWrite);
333  Kokkos::deep_copy(localTempView, localTempHost);
334  FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
335  }
336  const auto FCol2LayerView = FCol2LayerVector->getLocalViewDevice(Xpetra::Access::ReadOnly);
337  const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
338 
339  // Construct a map from fine level column to local dof per node id (including
340  // ghost nodes) Note: this is needed to sum all couplings within a layer
341  const auto FCol2DofVector =
343  {
344  // Fill local temp with local dof per node ids and fill ghost nodes
345  const auto localTempHost = localTemp->getLocalViewHost(Xpetra::Access::ReadWrite);
346  for (int row = 0; row < NFRows; row++)
347  localTempHost(row, 0) = row % DofsPerNode;
348  const auto localTempView = localTemp->getLocalViewDevice(Xpetra::Access::ReadWrite);
349  Kokkos::deep_copy(localTempView, localTempHost);
350  FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
351  }
352  const auto FCol2DofView = FCol2DofVector->getLocalViewDevice(Xpetra::Access::ReadOnly);
353  const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
354 
355  // Compute NVertLines
356  // TODO: Read this from line detection factory
357  int NVertLines = -1;
358  if (NFNodes != 0)
359  NVertLines = VertLineId[0];
360  for (int node = 1; node < NFNodes; ++node)
361  if (VertLineId[node] > NVertLines)
362  NVertLines = VertLineId[node];
363  NVertLines++;
364 
365  // Construct a map from Line, Layer ids to fine level node
366  LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
367  typename LOView2D::host_mirror_type LineLayer2NodeHost =
368  Kokkos::create_mirror_view(LineLayer2Node);
369  for (int node = 0; node < NFNodes; ++node)
370  LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
371  Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
372 
373  // Construct a map from coarse layer id to fine layer id
374  LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
375  typename LOView1D::host_mirror_type CLayer2FLayerHost =
376  Kokkos::create_mirror_view(CLayer2FLayer);
377  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
378  const LO FirstStride =
379  (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
380  const coordT RestStride =
381  ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
382  const LO NCpts =
383  (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
385  "sizes do not match.");
386  coordT stride = (coordT)FirstStride;
387  for (int clayer = 0; clayer < NCLayers; ++clayer) {
388  CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
389  stride += RestStride;
390  }
391  Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
392 
393  // Compute start layer and stencil sizes for layer interpolation at each
394  // coarse layer
395  int MaxStencilSize = 1;
396  LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
397  LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
398  typename LOView1D::host_mirror_type CLayer2StartLayerHost =
399  Kokkos::create_mirror_view(CLayer2StartLayer);
400  typename LOView1D::host_mirror_type CLayer2StencilSizeHost =
401  Kokkos::create_mirror_view(CLayer2StencilSize);
402  for (int clayer = 0; clayer < NCLayers; ++clayer) {
403  const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
404  const int stencilSize = (clayer < NCLayers - 1)
405  ? CLayer2FLayerHost(clayer + 1) - startLayer
406  : NFLayers - startLayer;
407 
408  if (MaxStencilSize < stencilSize)
409  MaxStencilSize = stencilSize;
410  CLayer2StartLayerHost(clayer) = startLayer;
411  CLayer2StencilSizeHost(clayer) = stencilSize;
412  }
413  Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
414  Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
415 
416  // Allocate storage for the coarse layer interpolation matrices on all
417  // vertical lines Note: Contributions to each matrix are collapsed to vertical
418  // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
419  // Here we store the full matrix to be compatible with kokkos kernels batch LU
420  // and tringular solve.
421  int Nmax = MaxStencilSize * DofsPerNode;
422  Kokkos::View<impl_SC ***, DeviceType> BandMat(
423  "BandMat", NVertLines, Nmax, Nmax);
424  Kokkos::View<impl_SC ***, DeviceType> BandSol(
425  "BandSol", NVertLines, Nmax, DofsPerNode);
426 
427  // Precompute number of nonzeros in prolongation matrix and allocate P views
428  // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
429  // interpolation stencil (StencilSize*DofsPerNode)
430  int NnzP = 0;
431  for (int clayer = 0; clayer < NCLayers; ++clayer)
432  NnzP += CLayer2StencilSizeHost(clayer);
433  NnzP *= NVertLines * DofsPerNode * DofsPerNode;
434  Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
435  Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
436 
437  // Precompute Pptr
438  // Note: Each coarse layer stencil dof contributes DofsPerNode to the
439  // corresponding row in P
440  Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
441  typename Kokkos::View<size_t *, DeviceType>::host_mirror_type PptrHost =
442  Kokkos::create_mirror_view(Pptr);
443  Kokkos::deep_copy(PptrHost, 0);
444  for (int line = 0; line < NVertLines; ++line) {
445  for (int clayer = 0; clayer < NCLayers; ++clayer) {
446  const int stencilSize = CLayer2StencilSizeHost(clayer);
447  const int startLayer = CLayer2StartLayerHost(clayer);
448  for (int snode = 0; snode < stencilSize; ++snode) {
449  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
450  const int layer = startLayer + snode;
451  const int AmatBlkRow = LineLayer2NodeHost(line, layer);
452  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
453  PptrHost(AmatRow + 1) += DofsPerNode;
454  }
455  }
456  }
457  }
458  for (int i = 2; i < NFRows + 1; ++i)
459  PptrHost(i) += PptrHost(i - 1);
460  TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
462  "Number of nonzeros in P does not match");
463  Kokkos::deep_copy(Pptr, PptrHost);
464 
465  // Precompute Pptr offsets
466  // Note: These are used to determine the nonzero index in Pvals and Pcols
467  Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
468  "layerBuckets", NFLayers);
469  Kokkos::deep_copy(layerBuckets, 0);
470  LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
471  MaxStencilSize);
472  typename LOView2D::host_mirror_type CLayerSNode2PptrOffsetHost =
473  Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
474  for (int clayer = 0; clayer < NCLayers; ++clayer) {
475  const int stencilSize = CLayer2StencilSizeHost(clayer);
476  const int startLayer = CLayer2StartLayerHost(clayer);
477  for (int snode = 0; snode < stencilSize; ++snode) {
478  const int layer = startLayer + snode;
479  CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
480  layerBuckets(layer)++;
481  }
482  }
483  Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
484 
485  { // Fill P - fill and solve each block tridiagonal system and fill P views
486  SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
487 
488  const auto localAmat = Amat->getLocalMatrixDevice();
489  const auto zero = impl_ATS::zero();
490  const auto one = impl_ATS::one();
491 
492  using range_policy = Kokkos::RangePolicy<execution_space>;
493  Kokkos::parallel_for(
494  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
495  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
496  for (int clayer = 0; clayer < NCLayers; ++clayer) {
497  // Initialize BandSol
498  auto bandSol =
499  Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
500  for (int row = 0; row < Nmax; ++row)
501  for (int dof = 0; dof < DofsPerNode; ++dof)
502  bandSol(row, dof) = zero;
503 
504  // Initialize BandMat (set unused row diagonal to 1.0)
505  const int stencilSize = CLayer2StencilSize(clayer);
506  const int N = stencilSize * DofsPerNode;
507  auto bandMat =
508  Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
509  for (int row = 0; row < Nmax; ++row)
510  for (int col = 0; col < Nmax; ++col)
511  bandMat(row, col) =
512  (row == col && row >= N) ? one : zero;
513 
514  // Loop over layers in stencil and fill banded matrix and rhs
515  const int flayer = CLayer2FLayer(clayer);
516  const int startLayer = CLayer2StartLayer(clayer);
517  for (int snode = 0; snode < stencilSize; ++snode) {
518  const int layer = startLayer + snode;
519  if (layer == flayer) { // If layer in stencil is a coarse layer
520  for (int dof = 0; dof < DofsPerNode; ++dof) {
521  const int row = snode * DofsPerNode + dof;
522  bandMat(row, row) = one;
523  bandSol(row, dof) = one;
524  }
525  } else { // Not a coarse layer
526  const int AmatBlkRow = LineLayer2Node(line, layer);
527  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
528  // Get Amat row info
529  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
530  const auto localAmatRow = localAmat.rowConst(AmatRow);
531  const int AmatRowLeng = localAmatRow.length;
532 
533  const int row = snode * DofsPerNode + dofi;
534  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
535  const int col = snode * DofsPerNode + dofj;
536 
537  // Sum values along row which correspond to stencil
538  // layer/dof and fill bandMat
539  auto val = zero;
540  for (int i = 0; i < AmatRowLeng; ++i) {
541  const int colidx = localAmatRow.colidx(i);
542  if (FCol2Layer(colidx) == layer &&
543  FCol2Dof(colidx) == dofj)
544  val += localAmatRow.value(i);
545  }
546  bandMat(row, col) = val;
547 
548  if (snode > 0) {
549  // Sum values along row which correspond to stencil
550  // layer/dof below and fill bandMat
551  val = zero;
552  for (int i = 0; i < AmatRowLeng; ++i) {
553  const int colidx = localAmatRow.colidx(i);
554  if (FCol2Layer(colidx) == layer - 1 &&
555  FCol2Dof(colidx) == dofj)
556  val += localAmatRow.value(i);
557  }
558  bandMat(row, col - DofsPerNode) = val;
559  }
560 
561  if (snode < stencilSize - 1) {
562  // Sum values along row which correspond to stencil
563  // layer/dof above and fill bandMat
564  val = zero;
565  for (int i = 0; i < AmatRowLeng; ++i) {
566  const int colidx = localAmatRow.colidx(i);
567  if (FCol2Layer(colidx) == layer + 1 &&
568  FCol2Dof(colidx) == dofj)
569  val += localAmatRow.value(i);
570  }
571  bandMat(row, col + DofsPerNode) = val;
572  }
573  }
574  }
575  }
576  }
577 
578  // Batch LU and triangular solves
579  namespace KB = KokkosBatched;
580  using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
581  lu_type::invoke(bandMat);
582  using trsv_l_type =
583  typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
584  KB::Trans::NoTranspose, KB::Diag::Unit,
585  KB::Algo::Trsm::Unblocked>;
586  trsv_l_type::invoke(one, bandMat, bandSol);
587  using trsv_u_type = typename KB::SerialTrsm<
588  KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
589  KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
590  trsv_u_type::invoke(one, bandMat, bandSol);
591 
592  // Fill prolongation views with solution
593  for (int snode = 0; snode < stencilSize; ++snode) {
594  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
595  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
596  const int layer = startLayer + snode;
597  const int AmatBlkRow = LineLayer2Node(line, layer);
598  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
599 
600  const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
601  const int pptr =
602  Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
603 
604  const int col =
605  line * NCLayers + clayer; // coarse node (block row) index
606  Pcols(pptr) = col * DofsPerNode + dofj;
607  Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
608  }
609  }
610  }
611  }
612  });
613  } // Fill P
614 
615  // Build P
616  RCP<const Map> rowMap = Amat->getRowMap();
617  Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
618  Xpetra::global_size_t itemp = GNdofs / NFLayers;
619  std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
620  RCP<const Map> coarseMap = StridedMapFactory::Build(
621  rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
622  stridingInfo_, rowMap->getComm(), -1, 0);
623  P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
624  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
625  PCrs->setAllValues(Pptr, Pcols, Pvals);
626  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
627 
628  // set StridingInformation of P
629  if (Amat->IsView("stridedMaps") == true)
630  P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
631  else
632  P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
633 
634  // Construct coarse nullspace and inject fine nullspace
635  coarseNullspace =
636  MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
637  const int numVectors = fineNullspace->getNumVectors();
638  const auto fineNullspaceView = fineNullspace->getLocalViewDevice(Xpetra::Access::ReadOnly);
639  const auto coarseNullspaceView = coarseNullspace->getLocalViewDevice(Xpetra::Access::ReadWrite);
640  using range_policy = Kokkos::RangePolicy<execution_space>;
641  Kokkos::parallel_for(
642  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
643  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
644  for (int clayer = 0; clayer < NCLayers; ++clayer) {
645  const int layer = CLayer2FLayer(clayer);
646  const int AmatBlkRow =
647  LineLayer2Node(line, layer); // fine node (block row) index
648  const int col =
649  line * NCLayers + clayer; // coarse node (block row) index
650  for (int k = 0; k < numVectors; ++k) {
651  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
652  const int fRow = AmatBlkRow * DofsPerNode + dofi;
653  const int cRow = col * DofsPerNode + dofi;
654  coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
655  }
656  }
657  }
658  });
659 }
660 
661 } // namespace MueLu
662 
663 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
664 #endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
ParameterList & setEntry(const std::string &name, U &&entry)
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
GlobalOrdinal GO
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
MueLu::DefaultNode Node
static const NoFactory * get()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the &quot;master&quot; list corresponding to the specified name. ...
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:73
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
iterator end() const
Prolongator factory performing semi-coarsening.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.