Collection of Concrete Vector Reduction/Transformation Operator Implementations  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
RTOpPack_TOpLinearCombination_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // RTOp: Interfaces and Support Software for Vector Reduction Transformation
5 // Operations
6 // Copyright (2006) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
39 //
40 // ***********************************************************************
41 // @HEADER
42 
43 #ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
44 #define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
45 
46 
47 #include "Teuchos_Workspace.hpp"
48 
49 
50 namespace RTOpPack {
51 
52 
53 template<class Scalar>
55  const ArrayView<const Scalar> &alpha_in,
56  const Scalar &beta_in
57  )
58  :beta_(beta_in)
59 {
60  if (alpha_in.size())
61  this->alpha(alpha_in);
62  this->setOpNameBase("TOpLinearCombination");
63 }
64 
65 
66 
67 template<class Scalar>
69  const ArrayView<const Scalar> &alpha_in )
70 {
71  TEUCHOS_TEST_FOR_EXCEPT( alpha_in.size() == 0 );
72  alpha_ = alpha_in;
73 }
74 
75 
76 template<class Scalar>
77 const ArrayView<const Scalar>
79 { return alpha_; }
80 
81 
82 template<class Scalar>
83 void TOpLinearCombination<Scalar>::beta( const Scalar& beta_in ) { beta_ = beta_in; }
84 
85 
86 template<class Scalar>
87 Scalar TOpLinearCombination<Scalar>::beta() const { return beta_; }
88 
89 
90 template<class Scalar>
91 int TOpLinearCombination<Scalar>::num_vecs() const { return alpha_.size(); }
92 
93 
94 // Overridden from RTOpT
95 
96 
97 template<class Scalar>
99  const ArrayView<const ConstSubVectorView<Scalar> > &sub_vecs,
100  const ArrayView<const SubVectorView<Scalar> > &targ_sub_vecs,
101  const Ptr<ReductTarget> &reduct_obj_inout
102  ) const
103 {
104 
105  using Teuchos::as;
106  using Teuchos::Workspace;
108  typedef typename Teuchos::ArrayRCP<Scalar>::iterator iter_t;
109  typedef typename Teuchos::ArrayRCP<const Scalar>::iterator const_iter_t;
111 
112 #ifdef TEUCHOS_DEBUG
113  validate_apply_op<Scalar>(*this, as<int>(alpha_.size()), 1, false,
114  sub_vecs, targ_sub_vecs, reduct_obj_inout.getConst());
115 #else
116  (void)reduct_obj_inout;
117 #endif
118 
119  const int l_num_vecs = alpha_.size();
120 
121  // Get iterators to local data
122  const RTOpPack::index_type subDim = targ_sub_vecs[0].subDim();
123  iter_t z0_val = targ_sub_vecs[0].values().begin();
124  const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
125  Workspace<const_iter_t> v_val(wss,l_num_vecs);
126  Workspace<ptrdiff_t> v_s(wss,l_num_vecs,false);
127  for( int k = 0; k < l_num_vecs; ++k ) {
128 #ifdef TEUCHOS_DEBUG
129  TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].subDim() != subDim );
130  TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].globalOffset() != targ_sub_vecs[0].globalOffset() );
131 #endif
132  v_val[k] = sub_vecs[k].values().begin();
133  v_s[k] = sub_vecs[k].stride();
134  }
135 
136  //
137  // Perform the operation and specialize the cases for l_num_vecs = 1 and 2
138  // in order to get good performance.
139  //
140  if( l_num_vecs == 1 ) {
141  //
142  // z0 = alpha*v0 + beta*z0
143  //
144  const Scalar l_alpha = alpha_[0], l_beta = beta_;
145  const_iter_t v0_val = v_val[0];
146  const ptrdiff_t v0_s = v_s[0];
147  if( l_beta==ST::zero() ) {
148  // z0 = alpha*v0
149  if( z0_s==1 && v0_s==1 ) {
150  for( int j = 0; j < subDim; ++j )
151  (*z0_val++) = l_alpha * (*v0_val++);
152  }
153  else {
154  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
155  (*z0_val) = l_alpha * (*v0_val);
156  }
157  }
158  else if( l_beta==ST::one() ) {
159  //
160  // z0 = alpha*v0 + z0
161  //
162  if( z0_s==1 && v0_s==1 ) {
163  for( int j = 0; j < subDim; ++j )
164  (*z0_val++) += l_alpha * (*v0_val++);
165  }
166  else {
167  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
168  (*z0_val) += l_alpha * (*v0_val);
169  }
170  }
171  else {
172  // z0 = alpha*v0 + beta*z0
173  if( z0_s==1 && v0_s==1 ) {
174  for( int j = 0; j < subDim; ++j, ++z0_val )
175  (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
176  }
177  else {
178  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
179  (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
180  }
181  }
182  }
183  else if( l_num_vecs == 2 ) {
184  //
185  // z0 = alpha0*v0 + alpha1*v1 + beta*z0
186  //
187  const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
188  const_iter_t v0_val = v_val[0];
189  const ptrdiff_t v0_s = v_s[0];
190  const_iter_t v1_val = v_val[1];
191  const ptrdiff_t v1_s = v_s[1];
192  if( l_beta==ST::zero() ) {
193  if( alpha0 == ST::one() ) {
194  if( alpha1 == ST::one() ) {
195  // z0 = v0 + v1
196  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
197  for( int j = 0; j < subDim; ++j )
198  (*z0_val++) = (*v0_val++) + (*v1_val++);
199  }
200  else {
201  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
202  (*z0_val) = (*v0_val) + (*v1_val);
203  }
204  }
205  else {
206  // z0 = v0 + alpha1*v1
207  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
208  for( int j = 0; j < subDim; ++j )
209  (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
210  }
211  else {
212  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
213  (*z0_val) = (*v0_val) + alpha1*(*v1_val);
214  }
215  }
216  }
217  else {
218  if( alpha1 == ST::one() ) {
219  // z0 = alpha0*v0 + v1
220  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
221  for( int j = 0; j < subDim; ++j )
222  (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
223  }
224  else {
225  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
226  (*z0_val) = alpha0*(*v0_val) + (*v1_val);
227  }
228  }
229  else {
230  // z0 = alpha0*v0 + alpha1*v1
231  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
232  for( int j = 0; j < subDim; ++j )
233  (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
234  }
235  else {
236  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
237  (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
238  }
239  }
240  }
241  }
242  else if( l_beta==ST::one() ) {
243  if( alpha0 == ST::one() ) {
244  if( alpha1 == ST::one() ) {
245  // z0 = v0 + v1 + z0
246  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
247  for( int j = 0; j < subDim; ++j, ++z0_val )
248  (*z0_val) += (*v0_val++) + (*v1_val++);
249  }
250  else {
251  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
252  (*z0_val) += (*v0_val) + (*v1_val);
253  }
254  }
255  else {
256  // z0 = v0 + alpha1*v1 + z0
257  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
258  for( int j = 0; j < subDim; ++j, ++z0_val )
259  (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
260  }
261  else {
262  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
263  (*z0_val) += (*v0_val) + alpha1*(*v1_val);
264  }
265  }
266  }
267  else {
268  if( alpha1 == ST::one() ) {
269  // z0 = alpha0*v0 + v1 + z0
270  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
271  for( int j = 0; j < subDim; ++j, ++z0_val )
272  (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
273  }
274  else {
275  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
276  (*z0_val) += alpha0*(*v0_val) + (*v1_val);
277  }
278  }
279  else {
280  // z0 = alpha0*v0 + alpha1*v1 + z0
281  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
282  for( int j = 0; j < subDim; ++j, ++z0_val )
283  (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
284  }
285  else {
286  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
287  (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
288  }
289  }
290  }
291  }
292  else {
293  if( alpha0 == ST::one() ) {
294  if( alpha1 == ST::one() ) {
295  // z0 = v0 + v1 + beta*z0
296  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
297  for( int j = 0; j < subDim; ++j, ++z0_val )
298  (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
299  }
300  else {
301  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
302  (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
303  }
304  }
305  else {
306  // z0 = v0 + alpha1*v1 + beta*z0
307  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
308  for( int j = 0; j < subDim; ++j, ++z0_val )
309  (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
310  }
311  else {
312  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
313  (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
314  }
315  }
316  }
317  else {
318  if( alpha1 == ST::one() ) {
319  // z0 = alpha0*v0 + v1 + beta*z0
320  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
321  for( int j = 0; j < subDim; ++j, ++z0_val )
322  (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
323  }
324  else {
325  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
326  (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
327  }
328  }
329  else {
330  // z0 = alpha0*v0 + alpha1*v1 + beta*z0
331  if( z0_s==1 && v0_s==1 && v1_s==1 ) {
332  for( int j = 0; j < subDim; ++j, ++z0_val )
333  (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
334  }
335  else {
336  for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
337  (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
338  }
339  }
340  }
341  }
342  }
343  else {
344  //
345  // Totally general implementation (but least efficient)
346  //
347  // z0 *= beta
348  if( beta_ == ST::zero() ) {
349  for( int j = 0; j < subDim; ++j, z0_val += z0_s )
350  (*z0_val) = ST::zero();
351  }
352  else if( beta_ != ST::one() ) {
353  for( int j = 0; j < subDim; ++j, z0_val += z0_s )
354  (*z0_val) *= beta_;
355  }
356  // z0 += sum( alpha[k]*v[k], k=0...l_num_vecs-1)
357  z0_val = targ_sub_vecs[0].values().begin();
358  for( int j = 0; j < subDim; ++j, z0_val += z0_s ) {
359  for( int k = 0; k < l_num_vecs; ++k ) {
360  const Scalar
361  &alpha_k = alpha_[k],
362  &v_k_val = *v_val[k];
363  (*z0_val) += alpha_k * v_k_val;
364  v_val[k] += v_s[k];
365  }
366  }
367  }
368 }
369 
370 
371 } // namespace RTOpPack
372 
373 
374 #endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
const ArrayView< const Scalar > alpha() const
void setOpNameBase(const std::string &op_name_base)
TypeTo as(const TypeFrom &t)
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()