Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_MultiVectorLocalGatherScatter.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
11 #define IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
12 
16 
17 #include "Tpetra_MultiVector.hpp"
18 #include "Tpetra_Map.hpp"
19 
20 namespace Ifpack2 {
21 namespace Details {
22 
51 template<class MV_in, class MV_out>
53 public:
54  typedef typename MV_in::scalar_type InScalar;
55  typedef typename MV_out::scalar_type OutScalar;
56  typedef typename MV_in::local_ordinal_type LO;
57  typedef typename MV_in::global_ordinal_type GO;
58  typedef typename MV_in::node_type NO;
59 
60  /**************/
61  /* MV <==> MV */
62  /**************/
63  void
64  gather (MV_out& X_out,
65  const MV_in& X_in,
66  const Teuchos::ArrayView<const LO> perm) const
67  {
68  using Teuchos::ArrayRCP;
69  const size_t numRows = X_out.getLocalLength ();
70  const size_t numVecs = X_in.getNumVectors ();
71  Kokkos::fence();
72  for (size_t j = 0; j < numVecs; ++j) {
73  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
74  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
75  for (size_t i = 0; i < numRows; ++i) {
76  const size_t i_perm = perm[i];
77  X_out_j[i] = X_in_j[i_perm];
78  }
79  }
80  X_out.modify_host();
81  }
82 
83  //Gather blocks (contiguous groups of blockSize rows)
84  //X_out and X_in are point indexed, but perm uses block indices.
85  //So X_out.getLocalLength() / blockSize gives the number of blocks.
86  void
87  gatherBlock (
88  MV_out& X_out,
89  const MV_in& X_in,
91  LO blockSize) const
92  {
93  using Teuchos::ArrayRCP;
94  const size_t numBlocks = X_out.getLocalLength() / blockSize;
95  const size_t numVecs = X_in.getNumVectors ();
96  Kokkos::fence();
97  for (size_t j = 0; j < numVecs; ++j) {
98  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
99  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
100  for (size_t i = 0; i < numBlocks; ++i) {
101  const size_t i_perm = perm[i];
102  for (LO k = 0; k < blockSize; k++) {
103  X_out_j[i * blockSize + k] = X_in_j[i_perm * blockSize + k];
104  }
105  }
106  }
107  X_out.modify_host();
108  }
109 
110  void
111  scatter (MV_in& X_in,
112  const MV_out& X_out,
113  const Teuchos::ArrayView<const LO> perm) const
114  {
115  using Teuchos::ArrayRCP;
116  const size_t numRows = X_out.getLocalLength();
117  const size_t numVecs = X_in.getNumVectors();
118  Kokkos::fence();
119  for (size_t j = 0; j < numVecs; ++j) {
120  ArrayRCP<InScalar> X_in_j = X_in.getDataNonConst(j);
121  ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
122  for (size_t i = 0; i < numRows; ++i) {
123  const size_t i_perm = perm[i];
124  X_in_j[i_perm] = X_out_j[i];
125  }
126  }
127  X_out.modify_host();
128  }
129 
130  void
131  scatterBlock (
132  MV_in& X_in,
133  const MV_out& X_out,
134  const Teuchos::ArrayView<const LO> perm,
135  LO blockSize) const
136  {
137  using Teuchos::ArrayRCP;
138  const size_t numBlocks = X_out.getLocalLength() / blockSize;
139  const size_t numVecs = X_in.getNumVectors ();
140  Kokkos::fence();
141  for (size_t j = 0; j < numVecs; ++j) {
142  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
143  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
144  for (size_t i = 0; i < numBlocks; ++i) {
145  const size_t i_perm = perm[i];
146  for (LO k = 0; k < blockSize; k++) {
147  X_in_j[i_perm * blockSize + k] = X_out_j[i * blockSize + k];
148  }
149  }
150  }
151  X_out.modify_host();
152  }
153 
154  /******************/
155  /* View <==> View */
156  /******************/
157  template<typename InView, typename OutView>
158  void gatherViewToView(OutView X_out,
159  const InView X_in,
160  const Teuchos::ArrayView<const LO> perm) const
161  {
162  //note: j is col, i is row
163  Kokkos::fence(); // demonstrated via unit test failure
164  for(size_t j = 0; j < X_out.extent(1); ++j) {
165  for(size_t i = 0; i < X_out.extent(0); ++i) {
166  const LO i_perm = perm[i];
167  X_out(i, j) = X_in(i_perm, j);
168  }
169  }
170  }
171 
172  template<typename InView, typename OutView>
173  void scatterViewToView(InView X_in,
174  const OutView X_out,
175  const Teuchos::ArrayView<const LO> perm) const
176  {
177  Kokkos::fence();
178  for(size_t j = 0; j < X_out.extent(1); ++j) {
179  for(size_t i = 0; i < X_out.extent(0); ++i) {
180  const LO i_perm = perm[i];
181  X_in(i_perm, j) = X_out(i, j);
182  }
183  }
184  }
185 
186  template<typename InView, typename OutView>
187  void gatherViewToViewBlock(OutView X_out,
188  const InView X_in,
189  const Teuchos::ArrayView<const LO> perm,
190  LO blockSize) const
191  {
192  //note: j is col, i is row
193  Kokkos::fence();
194  size_t numBlocks = X_out.extent(0) / blockSize;
195  for(size_t j = 0; j < X_out.extent(1); ++j) {
196  for(size_t i = 0; i < numBlocks; ++i) {
197  const LO i_perm = perm[i];
198  for(LO k = 0; k < blockSize; k++) {
199  X_out(i * blockSize + k, j) = X_in(i_perm * blockSize + k, j);
200  }
201  }
202  }
203  }
204 
205  template<typename InView, typename OutView>
206  void scatterViewToViewBlock(InView X_in,
207  const OutView X_out,
208  const Teuchos::ArrayView<const LO> perm,
209  LO blockSize) const
210  {
211  //note: j is col, i is row
212  Kokkos::fence();
213  size_t numBlocks = X_out.extent(0) / blockSize;
214  for(size_t j = 0; j < X_out.extent(1); ++j) {
215  for(size_t i = 0; i < numBlocks; ++i) {
216  const LO i_perm = perm[i];
217  for(LO k = 0; k < blockSize; k++) {
218  X_in(i_perm * blockSize + k, j) = X_out(i * blockSize + k, j);
219  }
220  }
221  }
222  }
223 
224  /*******************************/
225  /* MV <==> View specialization */
226  /*******************************/
227  template<typename InView>
228  void gatherMVtoView(MV_out X_out,
229  InView X_in,
230  const Teuchos::ArrayView<const LO> perm) const
231  {
232  //note: j is col, i is row
233  Kokkos::fence();
234  size_t numRows = X_out.getLocalLength();
235  for(size_t j = 0; j < X_out.getNumVectors(); ++j) {
236  Teuchos::ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
237  for(size_t i = 0; i < numRows; ++i) {
238  const LO i_perm = perm[i];
239  X_out_j[i] = X_in(i_perm, j);
240  }
241  }
242  }
243 
244  template<typename InView>
245  void scatterMVtoView(InView X_in,
246  MV_out X_out,
247  const Teuchos::ArrayView<const LO> perm) const
248  {
249  size_t numRows = X_out.getLocalLength();
250  Kokkos::fence();
251  for(size_t j = 0; j < X_in.extent(1); ++j) {
252  Teuchos::ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
253  for(size_t i = 0; i < numRows; ++i) {
254  const LO i_perm = perm[i];
255  X_in(i_perm, j) = X_out_j[i];
256  }
257  }
258  }
259 
260  template<typename InView>
261  void gatherMVtoViewBlock(MV_out X_out,
262  InView X_in,
263  const Teuchos::ArrayView<const LO> perm,
264  LO blockSize) const
265  {
266  //note: j is col, i is row
267  size_t numBlocks = X_out.getLocalLength() / blockSize;
268  Kokkos::fence();
269  for(size_t j = 0; j < X_out.getNumVectors(); ++j) {
270  Teuchos::ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
271  for(size_t i = 0; i < numBlocks; ++i) {
272  const LO i_perm = perm[i];
273  for(LO k = 0; k < blockSize; k++) {
274  X_out_j[i * blockSize + k] = X_in(i_perm * blockSize + k, j);
275  }
276  }
277  }
278  }
279 
280  template<typename InView>
281  void scatterMVtoViewBlock(InView X_in,
282  MV_out X_out,
283  const Teuchos::ArrayView<const LO> perm,
284  LO blockSize) const
285  {
286  size_t numBlocks = X_out.getLocalLength() / blockSize;
287  Kokkos::fence();
288  for(size_t j = 0; j < X_in.extent(1); ++j) {
289  Teuchos::ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
290  for(size_t i = 0; i < numBlocks; ++i) {
291  const LO i_perm = perm[i];
292  for(LO k = 0; k < blockSize; k++) {
293  X_in(i_perm * blockSize + k, j) = X_out_j[i * blockSize + k];
294  }
295  }
296  }
297  }
298 };
299 
300 } // namespace Details
301 } // namespace Ifpack2
302 
303 #endif // IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:52