43 #ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
44 #define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
47 #include "Teuchos_Workspace.hpp"
53 template<
class Scalar>
55 const ArrayView<const Scalar> &alpha_in,
61 this->
alpha(alpha_in);
67 template<
class Scalar>
69 const ArrayView<const Scalar> &alpha_in )
76 template<
class Scalar>
77 const ArrayView<const Scalar>
82 template<
class Scalar>
86 template<
class Scalar>
90 template<
class Scalar>
97 template<
class Scalar>
101 const Ptr<ReductTarget> &reduct_obj_inout
113 validate_apply_op<Scalar>(*
this, as<int>(alpha_.size()), 1,
false,
114 sub_vecs, targ_sub_vecs, reduct_obj_inout.getConst());
116 (void)reduct_obj_inout;
119 const int l_num_vecs = alpha_.size();
122 const RTOpPack::index_type subDim = targ_sub_vecs[0].subDim();
123 iter_t z0_val = targ_sub_vecs[0].values().begin();
124 const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
125 Workspace<const_iter_t> v_val(wss,l_num_vecs);
126 Workspace<ptrdiff_t> v_s(wss,l_num_vecs,
false);
127 for(
int k = 0; k < l_num_vecs; ++k ) {
132 v_val[k] = sub_vecs[k].values().begin();
133 v_s[k] = sub_vecs[k].stride();
140 if( l_num_vecs == 1 ) {
144 const Scalar l_alpha = alpha_[0], l_beta = beta_;
145 const_iter_t v0_val = v_val[0];
146 const ptrdiff_t v0_s = v_s[0];
147 if( l_beta==ST::zero() ) {
149 if( z0_s==1 && v0_s==1 ) {
150 for(
int j = 0; j < subDim; ++j )
151 (*z0_val++) = l_alpha * (*v0_val++);
154 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
155 (*z0_val) = l_alpha * (*v0_val);
158 else if( l_beta==ST::one() ) {
162 if( z0_s==1 && v0_s==1 ) {
163 for(
int j = 0; j < subDim; ++j )
164 (*z0_val++) += l_alpha * (*v0_val++);
167 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
168 (*z0_val) += l_alpha * (*v0_val);
173 if( z0_s==1 && v0_s==1 ) {
174 for(
int j = 0; j < subDim; ++j, ++z0_val )
175 (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
178 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
179 (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
183 else if( l_num_vecs == 2 ) {
187 const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
188 const_iter_t v0_val = v_val[0];
189 const ptrdiff_t v0_s = v_s[0];
190 const_iter_t v1_val = v_val[1];
191 const ptrdiff_t v1_s = v_s[1];
192 if( l_beta==ST::zero() ) {
193 if( alpha0 == ST::one() ) {
194 if( alpha1 == ST::one() ) {
196 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
197 for(
int j = 0; j < subDim; ++j )
198 (*z0_val++) = (*v0_val++) + (*v1_val++);
201 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
202 (*z0_val) = (*v0_val) + (*v1_val);
207 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
208 for(
int j = 0; j < subDim; ++j )
209 (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
212 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
213 (*z0_val) = (*v0_val) + alpha1*(*v1_val);
218 if( alpha1 == ST::one() ) {
220 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
221 for(
int j = 0; j < subDim; ++j )
222 (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
225 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
226 (*z0_val) = alpha0*(*v0_val) + (*v1_val);
231 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
232 for(
int j = 0; j < subDim; ++j )
233 (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
236 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
237 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
242 else if( l_beta==ST::one() ) {
243 if( alpha0 == ST::one() ) {
244 if( alpha1 == ST::one() ) {
246 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
247 for(
int j = 0; j < subDim; ++j, ++z0_val )
248 (*z0_val) += (*v0_val++) + (*v1_val++);
251 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
252 (*z0_val) += (*v0_val) + (*v1_val);
257 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
258 for(
int j = 0; j < subDim; ++j, ++z0_val )
259 (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
262 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
263 (*z0_val) += (*v0_val) + alpha1*(*v1_val);
268 if( alpha1 == ST::one() ) {
270 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
271 for(
int j = 0; j < subDim; ++j, ++z0_val )
272 (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
275 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
276 (*z0_val) += alpha0*(*v0_val) + (*v1_val);
281 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
282 for(
int j = 0; j < subDim; ++j, ++z0_val )
283 (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
286 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
287 (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
293 if( alpha0 == ST::one() ) {
294 if( alpha1 == ST::one() ) {
296 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
297 for(
int j = 0; j < subDim; ++j, ++z0_val )
298 (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
301 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
302 (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
307 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
308 for(
int j = 0; j < subDim; ++j, ++z0_val )
309 (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
312 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
313 (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
318 if( alpha1 == ST::one() ) {
320 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
321 for(
int j = 0; j < subDim; ++j, ++z0_val )
322 (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
325 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
326 (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
331 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
332 for(
int j = 0; j < subDim; ++j, ++z0_val )
333 (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
336 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
337 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
348 if( beta_ == ST::zero() ) {
349 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
350 (*z0_val) = ST::zero();
352 else if( beta_ != ST::one() ) {
353 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
357 z0_val = targ_sub_vecs[0].values().begin();
358 for(
int j = 0; j < subDim; ++j, z0_val += z0_s ) {
359 for(
int k = 0; k < l_num_vecs; ++k ) {
361 &alpha_k = alpha_[k],
362 &v_k_val = *v_val[k];
363 (*z0_val) += alpha_k * v_k_val;
374 #endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
const ArrayView< const Scalar > alpha() const
void setOpNameBase(const std::string &op_name_base)
TypeTo as(const TypeFrom &t)
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()