phdMesh  Version of the Day
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Groups
ParallelReduce.hpp
1 /*------------------------------------------------------------------------*/
2 /* phdMesh : Parallel Heterogneous Dynamic unstructured Mesh */
3 /* Copyright (2007) Sandia Corporation */
4 /* */
5 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */
6 /* license for use of this work by or on behalf of the U.S. Government. */
7 /* */
8 /* This library is free software; you can redistribute it and/or modify */
9 /* it under the terms of the GNU Lesser General Public License as */
10 /* published by the Free Software Foundation; either version 2.1 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* This library is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* Lesser General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU Lesser General Public */
19 /* License along with this library; if not, write to the Free Software */
20 /* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 */
21 /* USA */
22 /*------------------------------------------------------------------------*/
28 #ifndef util_ParallelReduce_hpp
29 #define util_ParallelReduce_hpp
30 
31 #include <cstddef>
32 #include <iosfwd>
33 #include <string>
34 #include <util/Parallel.hpp>
35 #include <util/SimpleArrayOps.hpp>
36 
37 //------------------------------------------------------------------------
38 
39 namespace phdmesh {
40 
44 void all_write_string( ParallelMachine ,
45  std::ostream & ,
46  const std::string & );
47 
48 void all_reduce_sum( ParallelMachine ,
49  const double * local , double * global , unsigned count );
50 
51 void all_reduce_sum( ParallelMachine ,
52  const float * local , float * global , unsigned count );
53 
54 void all_reduce_sum( ParallelMachine ,
55  const int * local , int * global , unsigned count );
56 
57 void all_reduce_bor( ParallelMachine ,
58  const unsigned * local ,
59  unsigned * global , unsigned count );
60 
72 }
73 
74 //----------------------------------------------------------------------
75 //----------------------------------------------------------------------
76 
77 namespace phdmesh {
78 
79 extern "C" {
80 typedef void (*ParallelReduceOp)
81  ( void * inv , void * outv , int * , ParallelDatatype * );
82 }
83 
84 void all_reduce_internal( ParallelMachine arg_comm ,
85  ParallelReduceOp arg_op ,
86  void * arg_in ,
87  void * arg_out ,
88  unsigned arg_len );
89 
90 namespace {
91 
92 // Blank namespace so that this class produces local symbols,
93 // avoiding complaints from a linker of multiple-define symbols.
94 
95 struct ReduceEnd {
96  struct BufferType {};
97  void copyin( BufferType & ) const {}
98  void copyout( BufferType & ) const {}
99  static void op( BufferType & , BufferType & ) {}
100 };
101 
102 // Workhorse class for aggregating reduction operations.
103 
104 template < class Oper , class Next = ReduceEnd >
105 struct Reduce {
106  typedef typename Oper::type Type ;
107  enum { N = Oper::N };
108 
109  struct BufferType {
110  Type m_value[N];
111  typename Next::BufferType m_next ;
112  };
113 
114  Next m_next ;
115  Type * m_ptr ;
116 
117  Next & set( const Oper & arg ) { m_ptr = arg.ptr ; return m_next ; }
118 
119  void reduce( ParallelMachine comm ) const ;
120 
121  void copyin( BufferType & b ) const
122  { Copy<N>( b.m_value , m_ptr ); m_next.copyin( b.m_next ); }
123 
124  void copyout( BufferType & b ) const
125  { Copy<N>( m_ptr , b.m_value ); m_next.copyout( b.m_next ); }
126 
127  static void op( BufferType & dst , BufferType & src )
128  { Oper::op(dst.m_value,src.m_value); Next::op(dst.m_next,src.m_next); }
129 
130  static void void_op( void*inv, void*inoutv, int*, ParallelDatatype*);
131 };
132 
133 template <class Oper, class Next>
134 void Reduce<Oper,Next>::void_op( void*inv, void*inoutv,int*,ParallelDatatype*)
135 {
136  op( * reinterpret_cast<BufferType*>( inoutv ) ,
137  * reinterpret_cast<BufferType*>( inv ) );
138 }
139 
140 template <class Oper, class Next>
141 void Reduce<Oper,Next>::reduce( ParallelMachine comm ) const
142 {
143  ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>( & void_op );
144  BufferType inbuf , outbuf ;
145  copyin( inbuf );
146  all_reduce_internal( comm , f , & inbuf , & outbuf , sizeof(BufferType) );
147  copyout( outbuf );
148 }
149 
150 } // namespace
151 } // namespace phdmesh
152 
153 //----------------------------------------------------------------------
154 //----------------------------------------------------------------------
155 
156 namespace phdmesh {
157 
158 template < class Op1 >
159 inline
160 void all_reduce( ParallelMachine comm , const Op1 & op1 )
161 {
162  Reduce< Op1 > work ;
163  work.set( op1 );
164  work.reduce( comm );
165 }
166 
167 template < class Op1 , class Op2 >
168 inline
169 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
170  const Op2 & op2 )
171 {
172  Reduce< Op1 ,
173  Reduce< Op2 > > work ;
174  work.set( op1 ).set( op2 );
175  work.reduce( comm );
176 }
177 
178 template < class Op1 , class Op2 , class Op3 >
179 inline
180 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
181  const Op2 & op2 ,
182  const Op3 & op3 )
183 {
184  Reduce< Op1 ,
185  Reduce< Op2 ,
186  Reduce< Op3 > > > work ;
187  work.set( op1 ).set( op2 ).set( op3 );
188  work.reduce( comm );
189 }
190 
191 template < class Op1 , class Op2 , class Op3 , class Op4 >
192 inline
193 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
194  const Op2 & op2 ,
195  const Op3 & op3 ,
196  const Op4 & op4 )
197 {
198  Reduce< Op1 ,
199  Reduce< Op2 ,
200  Reduce< Op3 ,
201  Reduce< Op4 > > > > work ;
202  work.set( op1 ).set( op2 ).set( op3 ).set( op4 );
203  work.reduce( comm );
204 }
205 
206 template < class Op1 , class Op2 , class Op3 , class Op4 ,
207  class Op5 >
208 inline
209 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
210  const Op2 & op2 ,
211  const Op3 & op3 ,
212  const Op4 & op4 ,
213  const Op5 & op5 )
214 {
215  Reduce< Op1 ,
216  Reduce< Op2 ,
217  Reduce< Op3 ,
218  Reduce< Op4 ,
219  Reduce< Op5 > > > > > work ;
220  work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 );
221  work.reduce( comm );
222 }
223 
224 template < class Op1 , class Op2 , class Op3 , class Op4 ,
225  class Op5 , class Op6 >
226 inline
227 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
228  const Op2 & op2 ,
229  const Op3 & op3 ,
230  const Op4 & op4 ,
231  const Op5 & op5 ,
232  const Op6 & op6 )
233 {
234  Reduce< Op1 ,
235  Reduce< Op2 ,
236  Reduce< Op3 ,
237  Reduce< Op4 ,
238  Reduce< Op5 ,
239  Reduce< Op6 > > > > > > work ;
240  work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 ).set( op6 );
241  work.reduce( comm );
242 }
243 
244 }
245 
246 //----------------------------------------------------------------------
247 
248 #endif
249 
void all_write_string(ParallelMachine, std::ostream &, const std::string &)