MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Kokkos_Core.hpp>
52 
53 #include <KokkosBatched_LU_Decl.hpp>
54 #include <KokkosBatched_LU_Serial_Impl.hpp>
55 #include <KokkosBatched_Trsm_Decl.hpp>
56 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
57 #include <KokkosBatched_Util.hpp>
58 #include <KokkosSparse_CrsMatrix.hpp>
59 
60 #include <Xpetra_CrsMatrixWrap.hpp>
61 #include <Xpetra_ImportFactory.hpp>
62 #include <Xpetra_Matrix.hpp>
63 #include <Xpetra_MultiVectorFactory.hpp>
64 #include <Xpetra_VectorFactory.hpp>
65 
67 #include "MueLu_MasterList.hpp"
68 #include "MueLu_Monitor.hpp"
70 
71 namespace MueLu {
72 
73 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
74 
75 RCP<const ParameterList>
78  RCP<ParameterList> validParamList = rcp(new ParameterList());
79 
80  std::string name = "semicoarsen: coarsen rate";
81  validParamList->setEntry(name, MasterList::getEntry(name));
82  validParamList->set<RCP<const FactoryBase>>(
83  "A", Teuchos::null, "Generating factory of the matrix A");
84  validParamList->set<RCP<const FactoryBase>>(
85  "Nullspace", Teuchos::null, "Generating factory of the nullspace");
86  validParamList->set<RCP<const FactoryBase>>(
87  "Coordinates", Teuchos::null, "Generating factory for coordinates");
88 
89  validParamList->set<RCP<const FactoryBase>>(
90  "LineDetection_VertLineIds", Teuchos::null,
91  "Generating factory for LineDetection vertical line ids");
92  validParamList->set<RCP<const FactoryBase>>(
93  "LineDetection_Layers", Teuchos::null,
94  "Generating factory for LineDetection layer ids");
95  validParamList->set<RCP<const FactoryBase>>(
96  "CoarseNumZLayers", Teuchos::null,
97  "Generating factory for number of coarse z-layers");
98 
99  return validParamList;
100 }
101 
102 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
103  class Node>
106  Node>::
107  DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
108  Input(fineLevel, "A");
109  Input(fineLevel, "Nullspace");
110 
111  Input(fineLevel, "LineDetection_VertLineIds");
112  Input(fineLevel, "LineDetection_Layers");
113  Input(fineLevel, "CoarseNumZLayers");
114 
115  // check whether fine level coordinate information is available.
116  // If yes, request the fine level coordinates and generate coarse coordinates
117  // during the Build call
118  if (fineLevel.GetLevelID() == 0) {
119  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
120  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
121  bTransferCoordinates_ = true;
122  }
123  } else if (bTransferCoordinates_ == true) {
124  // on coarser levels we check the default factory providing "Coordinates"
125  // or the factory declared to provide "Coordinates"
126  // first, check which factory is providing coordinate information
127  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
128  if (myCoordsFact == Teuchos::null) {
129  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
130  }
131  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
132  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
133  }
134  }
135 }
136 
137 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
138  class Node>
140  Build(Level &fineLevel,
141  Level &coarseLevel)
142  const {
143  return BuildP(fineLevel, coarseLevel);
144 }
145 
146 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
147  class Node>
149  BuildP(Level &fineLevel,
150  Level &coarseLevel)
151  const {
152  FactoryMonitor m(*this, "Build", coarseLevel);
153 
154  // obtain general variables
155  RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
156  RCP<MultiVector> fineNullspace =
157  Get<RCP<MultiVector>>(fineLevel, "Nullspace");
158 
159  // get user-provided coarsening rate parameter (constant over all levels)
160  const ParameterList &pL = GetParameterList();
161  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
163  CoarsenRate < 2, Exceptions::RuntimeError,
164  "semicoarsen: coarsen rate must be greater than 1");
165 
166  // collect general input data
167  LO BlkSize = A->GetFixedBlockSize();
168  RCP<const Map> rowMap = A->getRowMap();
169  LO Ndofs = rowMap->getLocalNumElements();
170  LO Nnodes = Ndofs / BlkSize;
171 
172  // collect line detection information generated by the LineDetectionFactory
173  // instance
174  LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
175  Teuchos::ArrayRCP<LO> TVertLineId =
176  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
177  Teuchos::ArrayRCP<LO> TLayerId =
178  Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
179 
180  // compute number of coarse layers
182  "Cannot coarsen further");
183  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
184  LO CoarseNumZLayers =
185  (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
186  if (CoarseNumZLayers < 1)
187  CoarseNumZLayers = 1;
188 
189  // generate transfer operator with semicoarsening
190  RCP<Matrix> P;
191  RCP<MultiVector> coarseNullspace;
192  BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
193  CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
194  coarseNullspace);
195 
196  // Store number of coarse z-layers on the coarse level container
197  // This information is used by the LineDetectionAlgorithm
198  // TODO get rid of the NoFactory
199  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
201 
202  // store semicoarsening transfer on coarse level
203  Set(coarseLevel, "P", P);
204  Set(coarseLevel, "Nullspace", coarseNullspace);
205 
206  // transfer coordinates
207  if (bTransferCoordinates_) {
208  SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
209  typedef Xpetra::MultiVector<
211  xdMV;
212  RCP<xdMV> fineCoords = Teuchos::null;
213  if (fineLevel.GetLevelID() == 0 &&
214  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
215  fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
216  } else {
217  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
218  if (myCoordsFact == Teuchos::null) {
219  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
220  }
221  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
222  fineCoords =
223  fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
224  }
225  }
226 
227  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
229  "No Coordinates found provided by the user.");
230 
231  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
233  "Three coordinates arrays must be supplied if "
234  "line detection orientation not given.");
236  fineCoords->getDataNonConst(0);
238  fineCoords->getDataNonConst(1);
240  fineCoords->getDataNonConst(2);
241 
242  // determine the maximum and minimum z coordinate value on the current
243  // processor.
244  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
246  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
248  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
249  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
251  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
253  typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
254  for (auto it = z.begin(); it != z.end(); ++it) {
255  if (*it > zval_max)
256  zval_max = *it;
257  if (*it < zval_min)
258  zval_min = *it;
259  }
260 
261  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
262 
264  myZLayerCoords = Teuchos::arcp<
265  typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
266  myCoarseZLayers);
267  if (myCoarseZLayers == 1) {
268  myZLayerCoords[0] = zval_min;
269  } else {
270  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
271  (zval_max - zval_min) / (myCoarseZLayers - 1);
272  for (LO k = 0; k < myCoarseZLayers; ++k) {
273  myZLayerCoords[k] = k * dz;
274  }
275  }
276 
277  // Note, that the coarse level node coordinates have to be in vertical
278  // ordering according to the numbering of the vertical lines
279 
280  // number of vertical lines on current node:
281  LO numVertLines = Nnodes / FineNumZLayers;
282  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
283 
284  RCP<const Map> coarseCoordMap = MapFactory::Build(
285  fineCoords->getMap()->lib(),
287  Teuchos::as<size_t>(numLocalCoarseNodes),
288  fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
289  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
290  typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
291  NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
292  coarseCoords->putScalar(-1.0);
294  coarseCoords->getDataNonConst(0);
296  coarseCoords->getDataNonConst(1);
298  coarseCoords->getDataNonConst(2);
299 
300  // loop over all vert line indices (stop as soon as possible)
301  LO cntCoarseNodes = 0;
302  for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
303  // vertical line id in *vt
304  LO curVertLineId = TVertLineId[vt];
305 
306  if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
307  cy[curVertLineId * myCoarseZLayers] == -1.0) {
308  // loop over all local myCoarseZLayers
309  for (LO n = 0; n < myCoarseZLayers; ++n) {
310  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
311  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
312  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
313  }
314  cntCoarseNodes += myCoarseZLayers;
315  }
316 
317  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
319  "number of coarse nodes is inconsistent.");
320  if (cntCoarseNodes == numLocalCoarseNodes)
321  break;
322  }
323 
324  // set coarse level coordinates
325  Set(coarseLevel, "Coordinates", coarseCoords);
326  } /* end bool bTransferCoordinates */
327 }
328 
329 template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
330  class Node>
333  Node>::
334  BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
335  const LO DofsPerNode, const LO NFLayers,
336  const LO NCLayers, const ArrayRCP<LO> LayerId,
337  const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
338  const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
339  RCP<MultiVector> &coarseNullspace) const {
340  SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
341  using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
342  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
343  using LOView1D = Kokkos::View<LO *, DeviceType>;
344  using LOView2D = Kokkos::View<LO **, DeviceType>;
345 
346  // Construct a map from fine level column to layer ids (including ghost nodes)
347  // Note: this is needed to sum all couplings within a layer
348  const auto FCol2LayerVector =
350  const auto localTemp =
351  Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
352  RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
353  if (importer == Teuchos::null)
354  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
355  {
356  // Fill local temp with layer ids and fill ghost nodes
357  const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
358  for (int row = 0; row < NFRows; row++)
359  localTempHost(row, 0) = LayerId[row / DofsPerNode];
360  const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
361  Kokkos::deep_copy(localTempView, localTempHost);
362  FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
363  }
364  const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
365  const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
366 
367  // Construct a map from fine level column to local dof per node id (including
368  // ghost nodes) Note: this is needed to sum all couplings within a layer
369  const auto FCol2DofVector =
371  {
372  // Fill local temp with local dof per node ids and fill ghost nodes
373  const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
374  for (int row = 0; row < NFRows; row++)
375  localTempHost(row, 0) = row % DofsPerNode;
376  const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
377  Kokkos::deep_copy(localTempView, localTempHost);
378  FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
379  }
380  const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
381  const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
382 
383  // Compute NVertLines
384  // TODO: Read this from line detection factory
385  int NVertLines = -1;
386  if (NFNodes != 0)
387  NVertLines = VertLineId[0];
388  for (int node = 1; node < NFNodes; ++node)
389  if (VertLineId[node] > NVertLines)
390  NVertLines = VertLineId[node];
391  NVertLines++;
392 
393  // Construct a map from Line, Layer ids to fine level node
394  LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
395  typename LOView2D::HostMirror LineLayer2NodeHost =
396  Kokkos::create_mirror_view(LineLayer2Node);
397  for (int node = 0; node < NFNodes; ++node)
398  LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
399  Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
400 
401  // Construct a map from coarse layer id to fine layer id
402  LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
403  typename LOView1D::HostMirror CLayer2FLayerHost =
404  Kokkos::create_mirror_view(CLayer2FLayer);
405  using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
406  const LO FirstStride =
407  (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
408  const coordT RestStride =
409  ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
410  const LO NCpts =
411  (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
413  "sizes do not match.");
414  coordT stride = (coordT)FirstStride;
415  for (int clayer = 0; clayer < NCLayers; ++clayer) {
416  CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
417  stride += RestStride;
418  }
419  Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
420 
421  // Compute start layer and stencil sizes for layer interpolation at each
422  // coarse layer
423  int MaxStencilSize = 1;
424  LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
425  LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
426  typename LOView1D::HostMirror CLayer2StartLayerHost =
427  Kokkos::create_mirror_view(CLayer2StartLayer);
428  typename LOView1D::HostMirror CLayer2StencilSizeHost =
429  Kokkos::create_mirror_view(CLayer2StencilSize);
430  for (int clayer = 0; clayer < NCLayers; ++clayer) {
431  const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
432  const int stencilSize = (clayer < NCLayers - 1)
433  ? CLayer2FLayerHost(clayer + 1) - startLayer
434  : NFLayers - startLayer;
435 
436  if (MaxStencilSize < stencilSize)
437  MaxStencilSize = stencilSize;
438  CLayer2StartLayerHost(clayer) = startLayer;
439  CLayer2StencilSizeHost(clayer) = stencilSize;
440  }
441  Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
442  Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
443 
444  // Allocate storage for the coarse layer interpolation matrices on all
445  // vertical lines Note: Contributions to each matrix are collapsed to vertical
446  // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
447  // Here we store the full matrix to be compatible with kokkos kernels batch LU
448  // and tringular solve.
449  int Nmax = MaxStencilSize * DofsPerNode;
450  Kokkos::View<impl_SC ***, DeviceType> BandMat(
451  "BandMat", NVertLines, Nmax, Nmax);
452  Kokkos::View<impl_SC ***, DeviceType> BandSol(
453  "BandSol", NVertLines, Nmax, DofsPerNode);
454 
455  // Precompute number of nonzeros in prolongation matrix and allocate P views
456  // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
457  // interpolation stencil (StencilSize*DofsPerNode)
458  int NnzP = 0;
459  for (int clayer = 0; clayer < NCLayers; ++clayer)
460  NnzP += CLayer2StencilSizeHost(clayer);
461  NnzP *= NVertLines * DofsPerNode * DofsPerNode;
462  Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
463  Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
464 
465  // Precompute Pptr
466  // Note: Each coarse layer stencil dof contributes DofsPerNode to the
467  // corresponding row in P
468  Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
469  typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
470  Kokkos::create_mirror_view(Pptr);
471  Kokkos::deep_copy(PptrHost, 0);
472  for (int line = 0; line < NVertLines; ++line) {
473  for (int clayer = 0; clayer < NCLayers; ++clayer) {
474  const int stencilSize = CLayer2StencilSizeHost(clayer);
475  const int startLayer = CLayer2StartLayerHost(clayer);
476  for (int snode = 0; snode < stencilSize; ++snode) {
477  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
478  const int layer = startLayer + snode;
479  const int AmatBlkRow = LineLayer2NodeHost(line, layer);
480  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
481  PptrHost(AmatRow + 1) += DofsPerNode;
482  }
483  }
484  }
485  }
486  for (int i = 2; i < NFRows + 1; ++i)
487  PptrHost(i) += PptrHost(i - 1);
488  TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
490  "Number of nonzeros in P does not match");
491  Kokkos::deep_copy(Pptr, PptrHost);
492 
493  // Precompute Pptr offsets
494  // Note: These are used to determine the nonzero index in Pvals and Pcols
495  Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
496  "layerBuckets", NFLayers);
497  Kokkos::deep_copy(layerBuckets, 0);
498  LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
499  MaxStencilSize);
500  typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
501  Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
502  for (int clayer = 0; clayer < NCLayers; ++clayer) {
503  const int stencilSize = CLayer2StencilSizeHost(clayer);
504  const int startLayer = CLayer2StartLayerHost(clayer);
505  for (int snode = 0; snode < stencilSize; ++snode) {
506  const int layer = startLayer + snode;
507  CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
508  layerBuckets(layer)++;
509  }
510  }
511  Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
512 
513  { // Fill P - fill and solve each block tridiagonal system and fill P views
514  SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
515 
516  const auto localAmat = Amat->getLocalMatrixDevice();
517  const auto zero = impl_ATS::zero();
518  const auto one = impl_ATS::one();
519 
520  using range_policy = Kokkos::RangePolicy<execution_space>;
521  Kokkos::parallel_for(
522  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
523  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
524  for (int clayer = 0; clayer < NCLayers; ++clayer) {
525  // Initialize BandSol
526  auto bandSol =
527  Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
528  for (int row = 0; row < Nmax; ++row)
529  for (int dof = 0; dof < DofsPerNode; ++dof)
530  bandSol(row, dof) = zero;
531 
532  // Initialize BandMat (set unused row diagonal to 1.0)
533  const int stencilSize = CLayer2StencilSize(clayer);
534  const int N = stencilSize * DofsPerNode;
535  auto bandMat =
536  Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
537  for (int row = 0; row < Nmax; ++row)
538  for (int col = 0; col < Nmax; ++col)
539  bandMat(row, col) =
540  (row == col && row >= N) ? one : zero;
541 
542  // Loop over layers in stencil and fill banded matrix and rhs
543  const int flayer = CLayer2FLayer(clayer);
544  const int startLayer = CLayer2StartLayer(clayer);
545  for (int snode = 0; snode < stencilSize; ++snode) {
546  const int layer = startLayer + snode;
547  if (layer == flayer) { // If layer in stencil is a coarse layer
548  for (int dof = 0; dof < DofsPerNode; ++dof) {
549  const int row = snode * DofsPerNode + dof;
550  bandMat(row, row) = one;
551  bandSol(row, dof) = one;
552  }
553  } else { // Not a coarse layer
554  const int AmatBlkRow = LineLayer2Node(line, layer);
555  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
556  // Get Amat row info
557  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
558  const auto localAmatRow = localAmat.rowConst(AmatRow);
559  const int AmatRowLeng = localAmatRow.length;
560 
561  const int row = snode * DofsPerNode + dofi;
562  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
563  const int col = snode * DofsPerNode + dofj;
564 
565  // Sum values along row which correspond to stencil
566  // layer/dof and fill bandMat
567  auto val = zero;
568  for (int i = 0; i < AmatRowLeng; ++i) {
569  const int colidx = localAmatRow.colidx(i);
570  if (FCol2Layer(colidx) == layer &&
571  FCol2Dof(colidx) == dofj)
572  val += localAmatRow.value(i);
573  }
574  bandMat(row, col) = val;
575 
576  if (snode > 0) {
577  // Sum values along row which correspond to stencil
578  // layer/dof below and fill bandMat
579  val = zero;
580  for (int i = 0; i < AmatRowLeng; ++i) {
581  const int colidx = localAmatRow.colidx(i);
582  if (FCol2Layer(colidx) == layer - 1 &&
583  FCol2Dof(colidx) == dofj)
584  val += localAmatRow.value(i);
585  }
586  bandMat(row, col - DofsPerNode) = val;
587  }
588 
589  if (snode < stencilSize - 1) {
590  // Sum values along row which correspond to stencil
591  // layer/dof above and fill bandMat
592  val = zero;
593  for (int i = 0; i < AmatRowLeng; ++i) {
594  const int colidx = localAmatRow.colidx(i);
595  if (FCol2Layer(colidx) == layer + 1 &&
596  FCol2Dof(colidx) == dofj)
597  val += localAmatRow.value(i);
598  }
599  bandMat(row, col + DofsPerNode) = val;
600  }
601  }
602  }
603  }
604  }
605 
606  // Batch LU and triangular solves
607  namespace KB = KokkosBatched;
608  using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
609  lu_type::invoke(bandMat);
610  using trsv_l_type =
611  typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
612  KB::Trans::NoTranspose, KB::Diag::Unit,
613  KB::Algo::Trsm::Unblocked>;
614  trsv_l_type::invoke(one, bandMat, bandSol);
615  using trsv_u_type = typename KB::SerialTrsm<
616  KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
617  KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
618  trsv_u_type::invoke(one, bandMat, bandSol);
619 
620  // Fill prolongation views with solution
621  for (int snode = 0; snode < stencilSize; ++snode) {
622  for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
623  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
624  const int layer = startLayer + snode;
625  const int AmatBlkRow = LineLayer2Node(line, layer);
626  const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
627 
628  const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
629  const int pptr =
630  Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
631 
632  const int col =
633  line * NCLayers + clayer; // coarse node (block row) index
634  Pcols(pptr) = col * DofsPerNode + dofj;
635  Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
636  }
637  }
638  }
639  }
640  });
641  } // Fill P
642 
643  // Build P
644  RCP<const Map> rowMap = Amat->getRowMap();
645  Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
646  Xpetra::global_size_t itemp = GNdofs / NFLayers;
647  std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
648  RCP<const Map> coarseMap = StridedMapFactory::Build(
649  rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
650  stridingInfo_, rowMap->getComm(), -1, 0);
651  P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
652  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
653  PCrs->setAllValues(Pptr, Pcols, Pvals);
654  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
655 
656  // set StridingInformation of P
657  if (Amat->IsView("stridedMaps") == true)
658  P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
659  else
660  P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
661 
662  // Construct coarse nullspace and inject fine nullspace
663  coarseNullspace =
664  MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
665  const int numVectors = fineNullspace->getNumVectors();
666  const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
667  const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
668  using range_policy = Kokkos::RangePolicy<execution_space>;
669  Kokkos::parallel_for(
670  "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
671  range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
672  for (int clayer = 0; clayer < NCLayers; ++clayer) {
673  const int layer = CLayer2FLayer(clayer);
674  const int AmatBlkRow =
675  LineLayer2Node(line, layer); // fine node (block row) index
676  const int col =
677  line * NCLayers + clayer; // coarse node (block row) index
678  for (int k = 0; k < numVectors; ++k) {
679  for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
680  const int fRow = AmatBlkRow * DofsPerNode + dofi;
681  const int cRow = col * DofsPerNode + dofi;
682  coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
683  }
684  }
685  }
686  });
687 }
688 
689 } // namespace MueLu
690 
691 #define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
692 #endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
ParameterList & setEntry(const std::string &name, U &&entry)
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
GlobalOrdinal GO
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LocalOrdinal LO
T * get() const
MueLu::DefaultNode Node
static const NoFactory * get()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the &quot;master&quot; list corresponding to the specified name. ...
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Exception throws to report errors in the internal logical of the program.
iterator end() const
Prolongator factory performing semi-coarsening.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.