28 #ifndef util_ParallelReduce_hpp
29 #define util_ParallelReduce_hpp
34 #include <util/Parallel.hpp>
35 #include <util/SimpleArrayOps.hpp>
46 const std::string & );
48 void all_reduce_sum( ParallelMachine ,
49 const double * local ,
double * global ,
unsigned count );
51 void all_reduce_sum( ParallelMachine ,
52 const float * local ,
float * global ,
unsigned count );
54 void all_reduce_sum( ParallelMachine ,
55 const int * local ,
int * global ,
unsigned count );
57 void all_reduce_bor( ParallelMachine ,
58 const unsigned * local ,
59 unsigned * global ,
unsigned count );
80 typedef void (*ParallelReduceOp)
81 (
void * inv ,
void * outv ,
int * , ParallelDatatype * );
84 void all_reduce_internal( ParallelMachine arg_comm ,
85 ParallelReduceOp arg_op ,
97 void copyin( BufferType & )
const {}
98 void copyout( BufferType & )
const {}
99 static void op( BufferType & , BufferType & ) {}
104 template <
class Oper ,
class Next = ReduceEnd >
106 typedef typename Oper::type Type ;
107 enum { N = Oper::N };
111 typename Next::BufferType m_next ;
117 Next & set(
const Oper & arg ) { m_ptr = arg.ptr ;
return m_next ; }
119 void reduce( ParallelMachine comm )
const ;
121 void copyin( BufferType & b )
const
122 { Copy<N>( b.m_value , m_ptr ); m_next.copyin( b.m_next ); }
124 void copyout( BufferType & b )
const
125 { Copy<N>( m_ptr , b.m_value ); m_next.copyout( b.m_next ); }
127 static void op( BufferType & dst , BufferType & src )
128 { Oper::op(dst.m_value,src.m_value); Next::op(dst.m_next,src.m_next); }
130 static void void_op(
void*inv,
void*inoutv,
int*, ParallelDatatype*);
133 template <
class Oper,
class Next>
134 void Reduce<Oper,Next>::void_op(
void*inv,
void*inoutv,
int*,ParallelDatatype*)
136 op( * reinterpret_cast<BufferType*>( inoutv ) ,
137 * reinterpret_cast<BufferType*>( inv ) );
140 template <
class Oper,
class Next>
141 void Reduce<Oper,Next>::reduce( ParallelMachine comm )
const
143 ParallelReduceOp f =
reinterpret_cast<ParallelReduceOp
>( & void_op );
144 BufferType inbuf , outbuf ;
146 all_reduce_internal( comm , f , & inbuf , & outbuf ,
sizeof(BufferType) );
158 template <
class Op1 >
160 void all_reduce( ParallelMachine comm ,
const Op1 & op1 )
167 template <
class Op1 ,
class Op2 >
169 void all_reduce( ParallelMachine comm ,
const Op1 & op1 ,
173 Reduce< Op2 > > work ;
174 work.set( op1 ).set( op2 );
178 template <
class Op1 ,
class Op2 ,
class Op3 >
180 void all_reduce( ParallelMachine comm ,
const Op1 & op1 ,
186 Reduce< Op3 > > > work ;
187 work.set( op1 ).set( op2 ).set( op3 );
191 template <
class Op1 ,
class Op2 ,
class Op3 ,
class Op4 >
193 void all_reduce( ParallelMachine comm ,
const Op1 & op1 ,
201 Reduce< Op4 > > > > work ;
202 work.set( op1 ).set( op2 ).set( op3 ).set( op4 );
206 template <
class Op1 ,
class Op2 ,
class Op3 ,
class Op4 ,
209 void all_reduce( ParallelMachine comm ,
const Op1 & op1 ,
219 Reduce< Op5 > > > > > work ;
220 work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 );
224 template <
class Op1 ,
class Op2 ,
class Op3 ,
class Op4 ,
225 class Op5 ,
class Op6 >
227 void all_reduce( ParallelMachine comm ,
const Op1 & op1 ,
239 Reduce< Op6 > > > > > > work ;
240 work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 ).set( op6 );
void all_write_string(ParallelMachine, std::ostream &, const std::string &)