11 #ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
12 #define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
21 template<
class Scalar>
29 this->
alpha(alpha_in);
35 template<
class Scalar>
44 template<
class Scalar>
50 template<
class Scalar>
54 template<
class Scalar>
58 template<
class Scalar>
65 template<
class Scalar>
81 validate_apply_op<Scalar>(*
this, as<int>(alpha_.size()), 1,
false,
82 sub_vecs, targ_sub_vecs, reduct_obj_inout.
getConst());
84 (void)reduct_obj_inout;
87 const int l_num_vecs = alpha_.size();
91 iter_t z0_val = targ_sub_vecs[0].values().begin();
92 const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
93 Workspace<const_iter_t> v_val(wss,l_num_vecs);
94 Workspace<ptrdiff_t> v_s(wss,l_num_vecs,
false);
95 for(
int k = 0; k < l_num_vecs; ++k ) {
100 v_val[k] = sub_vecs[k].values().begin();
101 v_s[k] = sub_vecs[k].stride();
108 if( l_num_vecs == 1 ) {
112 const Scalar l_alpha = alpha_[0], l_beta = beta_;
113 const_iter_t v0_val = v_val[0];
114 const ptrdiff_t v0_s = v_s[0];
115 if( l_beta==ST::zero() ) {
117 if( z0_s==1 && v0_s==1 ) {
118 for(
int j = 0; j < subDim; ++j )
119 (*z0_val++) = l_alpha * (*v0_val++);
122 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
123 (*z0_val) = l_alpha * (*v0_val);
126 else if( l_beta==ST::one() ) {
130 if( z0_s==1 && v0_s==1 ) {
131 for(
int j = 0; j < subDim; ++j )
132 (*z0_val++) += l_alpha * (*v0_val++);
135 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
136 (*z0_val) += l_alpha * (*v0_val);
141 if( z0_s==1 && v0_s==1 ) {
142 for(
int j = 0; j < subDim; ++j, ++z0_val )
143 (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
146 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
147 (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
151 else if( l_num_vecs == 2 ) {
155 const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
156 const_iter_t v0_val = v_val[0];
157 const ptrdiff_t v0_s = v_s[0];
158 const_iter_t v1_val = v_val[1];
159 const ptrdiff_t v1_s = v_s[1];
160 if( l_beta==ST::zero() ) {
161 if( alpha0 == ST::one() ) {
162 if( alpha1 == ST::one() ) {
164 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
165 for(
int j = 0; j < subDim; ++j )
166 (*z0_val++) = (*v0_val++) + (*v1_val++);
169 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
170 (*z0_val) = (*v0_val) + (*v1_val);
175 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
176 for(
int j = 0; j < subDim; ++j )
177 (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
180 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
181 (*z0_val) = (*v0_val) + alpha1*(*v1_val);
186 if( alpha1 == ST::one() ) {
188 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
189 for(
int j = 0; j < subDim; ++j )
190 (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
193 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
194 (*z0_val) = alpha0*(*v0_val) + (*v1_val);
199 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
200 for(
int j = 0; j < subDim; ++j )
201 (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
204 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
205 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
210 else if( l_beta==ST::one() ) {
211 if( alpha0 == ST::one() ) {
212 if( alpha1 == ST::one() ) {
214 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
215 for(
int j = 0; j < subDim; ++j, ++z0_val )
216 (*z0_val) += (*v0_val++) + (*v1_val++);
219 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
220 (*z0_val) += (*v0_val) + (*v1_val);
225 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
226 for(
int j = 0; j < subDim; ++j, ++z0_val )
227 (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
230 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
231 (*z0_val) += (*v0_val) + alpha1*(*v1_val);
236 if( alpha1 == ST::one() ) {
238 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
239 for(
int j = 0; j < subDim; ++j, ++z0_val )
240 (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
243 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
244 (*z0_val) += alpha0*(*v0_val) + (*v1_val);
249 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
250 for(
int j = 0; j < subDim; ++j, ++z0_val )
251 (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
254 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
255 (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
261 if( alpha0 == ST::one() ) {
262 if( alpha1 == ST::one() ) {
264 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
265 for(
int j = 0; j < subDim; ++j, ++z0_val )
266 (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
269 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
270 (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
275 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
276 for(
int j = 0; j < subDim; ++j, ++z0_val )
277 (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
280 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
281 (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
286 if( alpha1 == ST::one() ) {
288 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
289 for(
int j = 0; j < subDim; ++j, ++z0_val )
290 (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
293 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
294 (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
299 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
300 for(
int j = 0; j < subDim; ++j, ++z0_val )
301 (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
304 for(
int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
305 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
316 if( beta_ == ST::zero() ) {
317 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
318 (*z0_val) = ST::zero();
320 else if( beta_ != ST::one() ) {
321 for(
int j = 0; j < subDim; ++j, z0_val += z0_s )
325 z0_val = targ_sub_vecs[0].values().begin();
326 for(
int j = 0; j < subDim; ++j, z0_val += z0_s ) {
327 for(
int k = 0; k < l_num_vecs; ++k ) {
329 &alpha_k = alpha_[k],
330 &v_k_val = *v_val[k];
331 (*z0_val) += alpha_k * v_k_val;
342 #endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
Class for a changeable sub-vector.
Teuchos_Ordinal index_type
Class for a non-changeable sub-vector.
TypeTo as(const TypeFrom &t)
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
const ArrayView< const Scalar > alpha() const
void setOpNameBase(const std::string &op_name_base)
Just set the operator name.
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
Ptr< const T > getConst() const
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()