MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <stdlib.h>
14 
15 #include <Kokkos_Core.hpp>
16 
17 #include <KokkosBatched_LU_Decl.hpp>
18 #include <KokkosBatched_LU_Serial_Impl.hpp>
19 #include <KokkosBatched_Trsm_Decl.hpp>
20 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
21 #include <KokkosBatched_Util.hpp>
22 #include <KokkosSparse_CrsMatrix.hpp>
23 
24 #include <Xpetra_CrsMatrixWrap.hpp>
25 #include <Xpetra_ImportFactory.hpp>
26 #include <Xpetra_Matrix.hpp>
27 #include <Xpetra_MultiVectorFactory.hpp>
28 #include <Xpetra_VectorFactory.hpp>
29 
31 #include "MueLu_MasterList.hpp"
32 #include "MueLu_Monitor.hpp"
34 
35 namespace MueLu {
36 
37 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
38 
39 RCP<const ParameterList>
42  RCP<ParameterList> validParamList = rcp(new ParameterList());
43 
44  std::string name = "semicoarsen: coarsen rate";
45  validParamList->setEntry(name, MasterList::getEntry(name));
46  validParamList->set<RCP<const FactoryBase>>(
47  "A", Teuchos::null, "Generating factory of the matrix A");
48  validParamList->set<RCP<const FactoryBase>>(
49  "Nullspace", Teuchos::null, "Generating factory of the nullspace");
50  validParamList->set<RCP<const FactoryBase>>(
51  "Coordinates", Teuchos::null, "Generating factory for coordinates");
52 
53  validParamList->set<RCP<const FactoryBase>>(
54  "LineDetection_VertLineIds", Teuchos::null,
55  "Generating factory for LineDetection vertical line ids");
56  validParamList->set<RCP<const FactoryBase>>(
57  "LineDetection_Layers", Teuchos::null,
58  "Generating factory for LineDetection layer ids");
59  validParamList->set<RCP<const FactoryBase>>(
60  "CoarseNumZLayers", Teuchos::null,
61  "Generating factory for number of coarse z-layers");
62 
63  return validParamList;
64 }
65 
66 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
67  class Node>
70  Node>::
71  DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
72  Input(fineLevel, "A");
73  Input(fineLevel, "Nullspace");
74 
75  Input(fineLevel, "LineDetection_VertLineIds");
76  Input(fineLevel, "LineDetection_Layers");
77  Input(fineLevel, "CoarseNumZLayers");
78 
79  // check whether fine level coordinate information is available.
80  // If yes, request the fine level coordinates and generate coarse coordinates
81  // during the Build call
82  if (fineLevel.GetLevelID() == 0) {
83  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
84  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
85  bTransferCoordinates_ = true;
86  }
87  } else if (bTransferCoordinates_ == true) {
88  // on coarser levels we check the default factory providing "Coordinates"
89  // or the factory declared to provide "Coordinates"
90  // first, check which factory is providing coordinate information
91  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
92  if (myCoordsFact == Teuchos::null) {
93  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
94  }
95  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
96  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
97  }
98  }
99 }
100 
101 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
102  class Node>
104  Build(Level &fineLevel,
105  Level &coarseLevel)
106  const {
107  return BuildP(fineLevel, coarseLevel);
108 }
109 
110 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
111  class Node>
113  BuildP(Level &fineLevel,
114  Level &coarseLevel)
115  const {
116  FactoryMonitor m(*this, "Build", coarseLevel);
117 
118  // obtain general variables
119  RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
120  RCP<MultiVector> fineNullspace =
121  Get<RCP<MultiVector>>(fineLevel, "Nullspace");
122 
123  // get user-provided coarsening rate parameter (constant over all levels)
124  const ParameterList &pL = GetParameterList();
125  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
127  CoarsenRate < 2, Exceptions::RuntimeError,
128  "semicoarsen: coarsen rate must be greater than 1");
129 
130  // collect general input data
131  LO BlkSize = A->GetFixedBlockSize();
132  RCP<const Map> rowMap = A->getRowMap();
133  LO Ndofs = rowMap->getLocalNumElements();
134  LO Nnodes = Ndofs / BlkSize;
135 
136  // collect line detection information generated by the LineDetectionFactory
137  // instance
138  LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
139  Teuchos::ArrayRCP<LO> TVertLineId =
140  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
141  Teuchos::ArrayRCP<LO> TLayerId =
142  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
143 
144  // compute number of coarse layers
146  "Cannot coarsen further");
147  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148  LO CoarseNumZLayers =
149  (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150  if (CoarseNumZLayers < 1)
151  CoarseNumZLayers = 1;
152 
153  // generate transfer operator with semicoarsening
154  RCP<Matrix> P;
155  RCP<MultiVector> coarseNullspace;
156  BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157  CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
158  coarseNullspace);
159 
160  // Store number of coarse z-layers on the coarse level container
161  // This information is used by the LineDetectionAlgorithm
162  // TODO get rid of the NoFactory
163  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
165 
166  // store semicoarsening transfer on coarse level
167  Set(coarseLevel, "P", P);
168  Set(coarseLevel, "Nullspace", coarseNullspace);
169 
170  // transfer coordinates
171  if (bTransferCoordinates_) {
172  SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
173  typedef Xpetra::MultiVector<
175  xdMV;
176  RCP<xdMV> fineCoords = Teuchos::null;
177  if (fineLevel.GetLevelID() == 0 &&
178  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179  fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
180  } else {
181  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182  if (myCoordsFact == Teuchos::null) {
183  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
184  }
185  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
186  fineCoords =
187  fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
188  }
189  }
190 
191  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193  "No Coordinates found provided by the user.");
194 
195  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197  "Three coordinates arrays must be supplied if "
198  "line detection orientation not given.");
200  fineCoords->getDataNonConst(0);
202  fineCoords->getDataNonConst(1);
204  fineCoords->getDataNonConst(2);
205 
206  // determine the maximum and minimum z coordinate value on the current
207  // processor.
208  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
210  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
212  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
215  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
217  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218  for (auto it = z.begin(); it != z.end(); ++it) {
219  if (*it > zval_max)
220  zval_max = *it;
221  if (*it < zval_min)
222  zval_min = *it;
223  }
224 
225  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226 
228  myZLayerCoords = Teuchos::arcp<
229  typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
230  myCoarseZLayers);
231  if (myCoarseZLayers == 1) {
232  myZLayerCoords[0] = zval_min;
233  } else {
234  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235  (zval_max - zval_min) / (myCoarseZLayers - 1);
236  for (LO k = 0; k < myCoarseZLayers; ++k) {
237  myZLayerCoords[k] = k * dz;
238  }
239  }
240 
241  // Note, that the coarse level node coordinates have to be in vertical
242  // ordering according to the numbering of the vertical lines
243 
244  // number of vertical lines on current node:
245  LO numVertLines = Nnodes / FineNumZLayers;
246  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
247 
248  RCP<const Map> coarseCoordMap = MapFactory::Build(
249  fineCoords->getMap()->lib(),
251  Teuchos::as<size_t>(numLocalCoarseNodes),
252  fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254  typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255  NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256  coarseCoords->putScalar(-1.0);
258  coarseCoords->getDataNonConst(0);
260  coarseCoords->getDataNonConst(1);
262  coarseCoords->getDataNonConst(2);
263 
264  // loop over all vert line indices (stop as soon as possible)
265  LO cntCoarseNodes = 0;
266  for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
267  // vertical line id in *vt
268  LO curVertLineId = TVertLineId[vt];
269 
270  if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271  cy[curVertLineId * myCoarseZLayers] == -1.0) {
272  // loop over all local myCoarseZLayers
273  for (LO n = 0; n < myCoarseZLayers; ++n) {
274  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
277  }
278  cntCoarseNodes += myCoarseZLayers;
279  }
280 
281  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283  "number of coarse nodes is inconsistent.");
284  if (cntCoarseNodes == numLocalCoarseNodes)
285  break;
286  }
287 
288  // set coarse level coordinates
289  Set(coarseLevel, "Coordinates", coarseCoords);
290  } /* end bool bTransferCoordinates */
291 }
292 
293 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
294  class Node>
297  Node>::
298  BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
299  const LO DofsPerNode, const LO NFLayers,
300  const LO NCLayers, const ArrayRCP<LO> LayerId,
301  const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
302  const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303  RCP<MultiVector> &coarseNullspace) const {
304  SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
305  using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
306  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
307  using LOView1D = Kokkos::View<LO *, DeviceType>;
308  using LOView2D = Kokkos::View<LO **, DeviceType>;
309 
310  // Construct a map from fine level column to layer ids (including ghost nodes)
311  // Note: this is needed to sum all couplings within a layer
312  const auto FCol2LayerVector =
314  const auto localTemp =
315  Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
316  RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
317  if (importer == Teuchos::null)
318  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
319  {
320  // Fill local temp with layer ids and fill ghost nodes
321  const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
322  for (int row = 0; row < NFRows; row++)
323  localTempHost(row, 0) = LayerId[row / DofsPerNode];
324  const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
325  Kokkos::deep_copy(localTempView, localTempHost);
326  FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
327  }
328  const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
329  const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
330 
331  // Construct a map from fine level column to local dof per node id (including
332  // ghost nodes) Note: this is needed to sum all couplings within a layer
333  const auto FCol2DofVector =
335  {
336  // Fill local temp with local dof per node ids and fill ghost nodes
337  const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
338  for (int row = 0; row < NFRows; row++)
339  localTempHost(row, 0) = row % DofsPerNode;
340  const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
341  Kokkos::deep_copy(localTempView, localTempHost);
342  FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
343  }
344  const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
345  const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
346 
347  // Compute NVertLines
348  // TODO: Read this from line detection factory
349  int NVertLines = -1;
350  if (NFNodes != 0)
351  NVertLines = VertLineId[0];
352  for (int node = 1; node < NFNodes; ++node)
353  if (VertLineId[node] > NVertLines)
354  NVertLines = VertLineId[node];
355  NVertLines++;
356 
357  // Construct a map from Line, Layer ids to fine level node
358  LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
359  typename LOView2D::HostMirror LineLayer2NodeHost =
360  Kokkos::create_mirror_view(LineLayer2Node);
361  for (int node = 0; node < NFNodes; ++node)
362  LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
363  Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
364 
365  // Construct a map from coarse layer id to fine layer id
366  LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
367  typename LOView1D::HostMirror CLayer2FLayerHost =
368  Kokkos::create_mirror_view(CLayer2FLayer);
369  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
370  const LO FirstStride =
371  (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
372  const coordT RestStride =
373  ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
374  const LO NCpts =
375  (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
377  "sizes do not match.");
378  coordT stride = (coordT)FirstStride;
379  for (int clayer = 0; clayer < NCLayers; ++clayer) {
380  CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
381  stride += RestStride;
382  }
383  Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
384 
385  // Compute start layer and stencil sizes for layer interpolation at each
386  // coarse layer
387  int MaxStencilSize = 1;
388  LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
389  LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
390  typename LOView1D::HostMirror CLayer2StartLayerHost =
391  Kokkos::create_mirror_view(CLayer2StartLayer);
392  typename LOView1D::HostMirror CLayer2StencilSizeHost =
393  Kokkos::create_mirror_view(CLayer2StencilSize);
394  for (int clayer = 0; clayer < NCLayers; ++clayer) {
395  const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
396  const int stencilSize = (clayer < NCLayers - 1)
397  ? CLayer2FLayerHost(clayer + 1) - startLayer
398  : NFLayers - startLayer;
399 
400  if (MaxStencilSize < stencilSize)
401  MaxStencilSize = stencilSize;
402  CLayer2StartLayerHost(clayer) = startLayer;
403  CLayer2StencilSizeHost(clayer) = stencilSize;
404  }
405  Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
406  Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
407 
408  // Allocate storage for the coarse layer interpolation matrices on all
409  // vertical lines Note: Contributions to each matrix are collapsed to vertical
410  // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
411  // Here we store the full matrix to be compatible with kokkos kernels batch LU
412  // and tringular solve.
413  int Nmax = MaxStencilSize * DofsPerNode;
414  Kokkos::View<impl_SC ***, DeviceType> BandMat(
415  "BandMat", NVertLines, Nmax, Nmax);
416  Kokkos::View<impl_SC ***, DeviceType> BandSol(
417  "BandSol", NVertLines, Nmax, DofsPerNode);
418 
419  // Precompute number of nonzeros in prolongation matrix and allocate P views
420  // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
421  // interpolation stencil (StencilSize*DofsPerNode)
422  int NnzP = 0;
423  for (int clayer = 0; clayer < NCLayers; ++clayer)
424  NnzP += CLayer2StencilSizeHost(clayer);
425  NnzP *= NVertLines * DofsPerNode * DofsPerNode;
426  Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
427  Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
428 
429  // Precompute Pptr
430  // Note: Each coarse layer stencil dof contributes DofsPerNode to the
431  // corresponding row in P
432  Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
433  typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
434  Kokkos::create_mirror_view(Pptr);
435  Kokkos::deep_copy(PptrHost, 0);
436  for (int line = 0; line < NVertLines; ++line) {
437  for (int clayer = 0; clayer < NCLayers; ++clayer) {
438  const int stencilSize = CLayer2StencilSizeHost(clayer);
439  const int startLayer = CLayer2StartLayerHost(clayer);
440  for (int snode = 0; snode < stencilSize; ++snode) {
441  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
442  const int layer = startLayer + snode;
443  const int AmatBlkRow = LineLayer2NodeHost(line, layer);
444  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
445  PptrHost(AmatRow + 1) += DofsPerNode;
446  }
447  }
448  }
449  }
450  for (int i = 2; i < NFRows + 1; ++i)
451  PptrHost(i) += PptrHost(i - 1);
452  TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
454  "Number of nonzeros in P does not match");
455  Kokkos::deep_copy(Pptr, PptrHost);
456 
457  // Precompute Pptr offsets
458  // Note: These are used to determine the nonzero index in Pvals and Pcols
459  Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
460  "layerBuckets", NFLayers);
461  Kokkos::deep_copy(layerBuckets, 0);
462  LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
463  MaxStencilSize);
464  typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
465  Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
466  for (int clayer = 0; clayer < NCLayers; ++clayer) {
467  const int stencilSize = CLayer2StencilSizeHost(clayer);
468  const int startLayer = CLayer2StartLayerHost(clayer);
469  for (int snode = 0; snode < stencilSize; ++snode) {
470  const int layer = startLayer + snode;
471  CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
472  layerBuckets(layer)++;
473  }
474  }
475  Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
476 
477  { // Fill P - fill and solve each block tridiagonal system and fill P views
478  SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
479 
480  const auto localAmat = Amat->getLocalMatrixDevice();
481  const auto zero = impl_ATS::zero();
482  const auto one = impl_ATS::one();
483 
484  using range_policy = Kokkos::RangePolicy<execution_space>;
485  Kokkos::parallel_for(
486  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
487  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
488  for (int clayer = 0; clayer < NCLayers; ++clayer) {
489  // Initialize BandSol
490  auto bandSol =
491  Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
492  for (int row = 0; row < Nmax; ++row)
493  for (int dof = 0; dof < DofsPerNode; ++dof)
494  bandSol(row, dof) = zero;
495 
496  // Initialize BandMat (set unused row diagonal to 1.0)
497  const int stencilSize = CLayer2StencilSize(clayer);
498  const int N = stencilSize * DofsPerNode;
499  auto bandMat =
500  Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
501  for (int row = 0; row < Nmax; ++row)
502  for (int col = 0; col < Nmax; ++col)
503  bandMat(row, col) =
504  (row == col && row >= N) ? one : zero;
505 
506  // Loop over layers in stencil and fill banded matrix and rhs
507  const int flayer = CLayer2FLayer(clayer);
508  const int startLayer = CLayer2StartLayer(clayer);
509  for (int snode = 0; snode < stencilSize; ++snode) {
510  const int layer = startLayer + snode;
511  if (layer == flayer) { // If layer in stencil is a coarse layer
512  for (int dof = 0; dof < DofsPerNode; ++dof) {
513  const int row = snode * DofsPerNode + dof;
514  bandMat(row, row) = one;
515  bandSol(row, dof) = one;
516  }
517  } else { // Not a coarse layer
518  const int AmatBlkRow = LineLayer2Node(line, layer);
519  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
520  // Get Amat row info
521  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
522  const auto localAmatRow = localAmat.rowConst(AmatRow);
523  const int AmatRowLeng = localAmatRow.length;
524 
525  const int row = snode * DofsPerNode + dofi;
526  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
527  const int col = snode * DofsPerNode + dofj;
528 
529  // Sum values along row which correspond to stencil
530  // layer/dof and fill bandMat
531  auto val = zero;
532  for (int i = 0; i < AmatRowLeng; ++i) {
533  const int colidx = localAmatRow.colidx(i);
534  if (FCol2Layer(colidx) == layer &&
535  FCol2Dof(colidx) == dofj)
536  val += localAmatRow.value(i);
537  }
538  bandMat(row, col) = val;
539 
540  if (snode > 0) {
541  // Sum values along row which correspond to stencil
542  // layer/dof below and fill bandMat
543  val = zero;
544  for (int i = 0; i < AmatRowLeng; ++i) {
545  const int colidx = localAmatRow.colidx(i);
546  if (FCol2Layer(colidx) == layer - 1 &&
547  FCol2Dof(colidx) == dofj)
548  val += localAmatRow.value(i);
549  }
550  bandMat(row, col - DofsPerNode) = val;
551  }
552 
553  if (snode < stencilSize - 1) {
554  // Sum values along row which correspond to stencil
555  // layer/dof above and fill bandMat
556  val = zero;
557  for (int i = 0; i < AmatRowLeng; ++i) {
558  const int colidx = localAmatRow.colidx(i);
559  if (FCol2Layer(colidx) == layer + 1 &&
560  FCol2Dof(colidx) == dofj)
561  val += localAmatRow.value(i);
562  }
563  bandMat(row, col + DofsPerNode) = val;
564  }
565  }
566  }
567  }
568  }
569 
570  // Batch LU and triangular solves
571  namespace KB = KokkosBatched;
572  using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
573  lu_type::invoke(bandMat);
574  using trsv_l_type =
575  typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
576  KB::Trans::NoTranspose, KB::Diag::Unit,
577  KB::Algo::Trsm::Unblocked>;
578  trsv_l_type::invoke(one, bandMat, bandSol);
579  using trsv_u_type = typename KB::SerialTrsm<
580  KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
581  KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
582  trsv_u_type::invoke(one, bandMat, bandSol);
583 
584  // Fill prolongation views with solution
585  for (int snode = 0; snode < stencilSize; ++snode) {
586  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
587  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
588  const int layer = startLayer + snode;
589  const int AmatBlkRow = LineLayer2Node(line, layer);
590  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
591 
592  const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
593  const int pptr =
594  Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
595 
596  const int col =
597  line * NCLayers + clayer; // coarse node (block row) index
598  Pcols(pptr) = col * DofsPerNode + dofj;
599  Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
600  }
601  }
602  }
603  }
604  });
605  } // Fill P
606 
607  // Build P
608  RCP<const Map> rowMap = Amat->getRowMap();
609  Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
610  Xpetra::global_size_t itemp = GNdofs / NFLayers;
611  std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
612  RCP<const Map> coarseMap = StridedMapFactory::Build(
613  rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
614  stridingInfo_, rowMap->getComm(), -1, 0);
615  P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
616  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
617  PCrs->setAllValues(Pptr, Pcols, Pvals);
618  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
619 
620  // set StridingInformation of P
621  if (Amat->IsView("stridedMaps") == true)
622  P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
623  else
624  P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
625 
626  // Construct coarse nullspace and inject fine nullspace
627  coarseNullspace =
628  MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
629  const int numVectors = fineNullspace->getNumVectors();
630  const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
631  const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
632  using range_policy = Kokkos::RangePolicy<execution_space>;
633  Kokkos::parallel_for(
634  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
635  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
636  for (int clayer = 0; clayer < NCLayers; ++clayer) {
637  const int layer = CLayer2FLayer(clayer);
638  const int AmatBlkRow =
639  LineLayer2Node(line, layer); // fine node (block row) index
640  const int col =
641  line * NCLayers + clayer; // coarse node (block row) index
642  for (int k = 0; k < numVectors; ++k) {
643  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
644  const int fRow = AmatBlkRow * DofsPerNode + dofi;
645  const int cRow = col * DofsPerNode + dofi;
646  coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
647  }
648  }
649  }
650  });
651 }
652 
653 } // namespace MueLu
654 
655 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
656 #endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
ParameterList & setEntry(const std::string &name, U &&entry)
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
GlobalOrdinal GO
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
MueLu::DefaultNode Node
static const NoFactory * get()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the &quot;master&quot; list corresponding to the specified name. ...
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:73
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
iterator end() const
Prolongator factory performing semi-coarsening.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.