RTOpPack: Extra C/C++ Code for Vector Reduction/Transformation Operators  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
RTOpToMPI.c
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
6 // Copyright (2003) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
39 //
40 // ***********************************************************************
41 // @HEADER
42 */
43 
44 #include "RTOpToMPI.h"
45 
46 #include <stdlib.h>
47 
48 #ifdef RTOP_TO_MPI_SHOW_TIMES
49 #include <time.h>
50 #include <stdio.h>
51 #endif
52 
54  const int num_values
55  ,const int num_indexes
56  ,const int num_chars
57  ,int* num_entries
58  ,int block_lengths[]
59  ,MPI_Aint displacements[]
60  ,MPI_Datatype datatypes[]
61  )
62 {
63  int k = 0, off = 0;
64  /* values */
65  block_lengths[k] = 3 + num_values; /* must carry size information */
66  displacements[k] = 0;
67  datatypes[k] = RTOpMPI_VALUE_TYPE;
68  ++k;
69  off += (3 + num_values) * sizeof(RTOp_value_type);
70  /* indexes */
71  if( num_indexes ) {
72  block_lengths[k] = num_indexes;
73  displacements[k] = off;
74  datatypes[k] = RTOpMPI_INDEX_TYPE;
75  ++k;
76  off += num_indexes * sizeof(RTOp_index_type);
77  }
78  /* chars */
79  if( num_chars ) {
80  block_lengths[k] = num_chars;
81  displacements[k] = off;
82  datatypes[k] = RTOpMPI_CHAR_TYPE;
83  ++k;
84  }
85  *num_entries = k;
86 }
87 
89  const struct RTOp_RTOp* op
90  ,RTOp_ReductTarget reduct_obj
91  ,int num_values
92  ,int num_indexes
93  ,int num_chars
94  ,void* reduct_obj_ext
95  )
96 {
97  int err = 0;
98  int
99  num_values_off = 0,
100  num_indexes_off = num_values_off + sizeof(RTOp_value_type),
101  num_chars_off = num_indexes_off + sizeof(RTOp_value_type),
102  values_off = num_chars_off + sizeof(RTOp_value_type),
103  indexes_off = values_off + num_values * sizeof(RTOp_value_type),
104  chars_off = indexes_off + num_indexes * sizeof(RTOp_index_type);
105  *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off) = num_values;
106  *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off) = num_indexes;
107  *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off) = num_chars;
109  op,reduct_obj
110  ,num_values, (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
111  ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
112  ,num_chars, (RTOp_char_type*)((char*)reduct_obj_ext + chars_off)
113  );
114  return err;
115 }
116 
118  const struct RTOp_RTOp* op
119  ,const void* reduct_obj_ext
120  ,RTOp_ReductTarget reduct_obj
121  )
122 {
123  int err = 0;
124  int
125  num_values_off = 0,
126  num_values = *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off),
127  num_indexes_off = num_values_off + sizeof(RTOp_value_type),
128  num_indexes = *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off),
129  num_chars_off = num_indexes_off + sizeof(RTOp_value_type),
130  num_chars = *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off),
131  values_off = num_chars_off + sizeof(RTOp_value_type),
132  indexes_off = values_off + num_values * sizeof(RTOp_value_type),
133  chars_off = indexes_off + num_indexes * sizeof(RTOp_index_type);
135  op
136  ,num_values, (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
137  ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
138  ,num_chars, (RTOp_char_type*)((char*) reduct_obj_ext + chars_off)
139  ,reduct_obj
140  );
141  return err;
142 }
143 
145  MPI_Comm comm, const struct RTOp_RTOp* op, int root_rank
146  ,const int num_cols
147  ,const int num_vecs, const struct RTOp_SubVector sub_vecs[]
148  ,const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[]
149  ,RTOp_ReductTarget reduct_objs[]
150  )
151 {
152  int err = 0;
153  int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
154  num_reduct_type_chars = 0, num_reduct_type_entries = 0;
155  RTOp_ReductTarget *i_reduct_objs = NULL;
156  int target_type_block_lengths[3];
157  MPI_Aint target_type_displacements[3];
158  RTOp_Datatype target_type_datatypes[3];
159  MPI_Datatype mpi_reduct_ext_type = MPI_DATATYPE_NULL;
160  int reduct_obj_ext_size = 0;
161  char *i_reduct_objs_ext = NULL;
162  char *i_reduct_objs_tmp = NULL;
163  RTOp_reduct_op_func_ptr_t reduct_op_func_ptr = NULL;
164  MPI_Op mpi_op = MPI_OP_NULL;
165  int rank = -1;
166  int k;
167  int kc;
168 #ifdef RTOP_TO_MPI_SHOW_TIMES
169  const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
170  clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
171 #endif
172 
173  if( comm == MPI_COMM_NULL ) {
174  /* Just do the reduction on this processor and be done with it! */
175  if( sub_vecs || sub_targ_vecs ) {
176  for( kc = 0; kc < num_cols; ++kc ) {
177  err = RTOp_apply_op(
178  op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
179  ,reduct_objs ? reduct_objs[kc] : RTOp_REDUCT_OBJ_NULL
180  );
181  if (err) goto ERR_LABEL;
182  }
183  }
184  return 0; /* Success! */
185  }
186 
187  /*
188  * Get the description of the datatype of the target object.
189  * We need this in order to map it to an MPI user defined
190  * data type so that the reduction operations can be performed
191  * over all of the of processors.
192  *
193  * Get the number of the entries in the array that describes
194  * target type's datatypes
195  */
197  op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
198  if(err) goto ERR_LABEL;
199  num_reduct_type_entries
200  = (num_reduct_type_values ? 1 : 0)
201  + (num_reduct_type_indexes ? 1 : 0)
202  + (num_reduct_type_chars ? 1 : 0);
203 
204  if( num_reduct_type_entries ) {
205 #ifdef RTOP_TO_MPI_SHOW_TIMES
206  if(RTOp_MPI_apply_op_print_timings) {
207  printf("RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
208  ticks_start_start = clock();
209  }
210 #endif
211  /*
212  * There is a non-null reduction target object so we need to reduce
213  * it across processors
214  *
215  * Allocate the intermediate target object and perform the reduction for the
216  * vector elements on this processor.
217  */
218  i_reduct_objs = malloc( sizeof(RTOp_ReductTarget) * num_cols );
219  for( kc = 0; kc < num_cols; ++kc ) {
220  i_reduct_objs[kc] = RTOp_REDUCT_OBJ_NULL;
221  RTOp_reduct_obj_create( op, i_reduct_objs + kc );
222  }
223 #ifdef RTOP_TO_MPI_SHOW_TIMES
224  if(RTOp_MPI_apply_op_print_timings) {
225  printf("calling RTOp_apply_op(...)");
226  ticks_start = clock();
227  }
228 #endif
229  if( sub_vecs || sub_targ_vecs ) {
230  for( kc = 0; kc < num_cols; ++kc ) {
231  err = RTOp_apply_op(
232  op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
233  ,i_reduct_objs[kc]
234  );
235  if (err) goto ERR_LABEL;
236  }
237  }
238 #ifdef RTOP_TO_MPI_SHOW_TIMES
239  if(RTOp_MPI_apply_op_print_timings) {
240  ticks_end = clock();
241  printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
242  }
243 #endif
244  /* Get the arrays that describe the reduction object datatype */
245  if( num_reduct_type_values == 0 ) ++num_reduct_type_entries; /* Add the block for the sizes */
247  num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
248  ,&num_reduct_type_entries
249  ,target_type_block_lengths, target_type_displacements, target_type_datatypes
250  );
251  /* Translate this target datatype description to an MPI datatype */
252 #ifdef RTOP_TO_MPI_SHOW_TIMES
253  if(RTOp_MPI_apply_op_print_timings) {
254  printf("calling MPI_Type_struct(...)");
255  ticks_start = clock();
256  }
257 #endif
258  err = MPI_Type_struct( num_reduct_type_entries
259  , target_type_block_lengths, target_type_displacements
260  , target_type_datatypes, &mpi_reduct_ext_type );
261 #ifdef RTOP_TO_MPI_SHOW_TIMES
262  if(RTOp_MPI_apply_op_print_timings) {
263  ticks_end = clock();
264  printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
265  }
266 #endif
267  if(err) goto ERR_LABEL;
268 #ifdef RTOP_TO_MPI_SHOW_TIMES
269  if(RTOp_MPI_apply_op_print_timings) {
270  printf("calling MPI_Type_commit(...)");
271  ticks_start = clock();
272  }
273 #endif
274  err = MPI_Type_commit( &mpi_reduct_ext_type );
275 #ifdef RTOP_TO_MPI_SHOW_TIMES
276  if(RTOp_MPI_apply_op_print_timings) {
277  ticks_end = clock();
278  printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
279  }
280 #endif
281  if(err) goto ERR_LABEL;
282  /* */
283  /* Create an external contiguous representation for the intermediate reduction object */
284  /* that MPI can use. This is very low level but at least the user */
285  /* does not have to do it. */
286  /* */
287  reduct_obj_ext_size =
288  (3 + num_reduct_type_values) * sizeof(RTOp_value_type) +
289  num_reduct_type_indexes * sizeof(RTOp_index_type) +
290  num_reduct_type_chars * sizeof(RTOp_char_type);
291  i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
292  for( kc = 0; kc < num_cols; ++kc ) {
294  op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
295  ,i_reduct_objs_ext+kc*reduct_obj_ext_size
296  );
297  }
298  /* */
299  /* Now perform a global reduction over all of the processors. */
300  /* */
301  /* Get the user defined reduction operation for MPI to use */
302  RTOp_get_reduct_op( op, &reduct_op_func_ptr );
303  if( reduct_op_func_ptr != NULL ) {
304  /* */
305  /* The operator object has returned an MPI compatible reduction function so */
306  /* MPI will be of great help in preforming the global reduction :-). */
307  /* */
308  /* Create the MPI reduction operator object */
309  /* */
310 #ifdef RTOP_TO_MPI_SHOW_TIMES
311  if(RTOp_MPI_apply_op_print_timings) {
312  printf("calling MPI_Op_create(...)");
313  ticks_start = clock();
314  }
315 #endif
316  err = MPI_Op_create(
317  reduct_op_func_ptr
318  ,1 /* The op is communitive? yes == 1! */
319  ,&mpi_op
320  );
321 #ifdef RTOP_TO_MPI_SHOW_TIMES
322  if(RTOp_MPI_apply_op_print_timings) {
323  ticks_end = clock();
324  printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
325  }
326 #endif
327  if(err) goto ERR_LABEL;
328  if( root_rank >= 0 ) {
329  MPI_Comm_rank( comm, &rank );
330  /* Apply the reduction operation over all of the processors */
331  /* and reduce to one target object only on the root processor */
332  i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
333  err = MPI_Reduce(
334  i_reduct_objs_ext
335  ,rank == root_rank ? i_reduct_objs_tmp : NULL /* I don't know if this is MPI legal? */
336  ,num_cols, mpi_reduct_ext_type
337  ,mpi_op, root_rank, comm
338  );
339  if(err) goto ERR_LABEL;
340  if( rank == root_rank ) { /* bit-by-bit copy */
341  for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
342  }
343  }
344  else {
345  /* Apply the reduction operation over all of the processors */
346  /* and reduce to one target object on each processor */
347  i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
348 #ifdef RTOP_TO_MPI_SHOW_TIMES
349  if(RTOp_MPI_apply_op_print_timings) {
350  printf("calling MPI_Allreduce(...)");
351  ticks_start = clock();
352  }
353 #endif
354  err = MPI_Allreduce(
355  i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
356  ,mpi_reduct_ext_type, mpi_op, comm
357  );
358 #ifdef RTOP_TO_MPI_SHOW_TIMES
359  if(RTOp_MPI_apply_op_print_timings) {
360  ticks_end = clock();
361  printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
362  }
363 #endif
364  if(err) goto ERR_LABEL;
365  for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
366  }
367  /* Load the updated state of the reduction target object and reduce. */
368  for( kc = 0; kc < num_cols; ++kc ) {
369  RTOp_load_reduct_obj_ext_state( op, i_reduct_objs_ext+kc*reduct_obj_ext_size, i_reduct_objs[kc] );
370  RTOp_reduce_reduct_objs( op, i_reduct_objs[kc], reduct_objs[kc] );
371  }
372  }
373  else {
374  /* */
375  /* We must do without the MPI compatible reduction function :-( We must */
376  /* manually perform the reduction operation ourselves. Note, this will */
377  /* not be as efficient as when MPI would do it but it helps take some */
378  /* of the burden off of the developers of operator classes. */
379  /* */
380  /* What we need to do is to gather all of the intermediate reduction */
381  /* objects to the root process and then do the reduction pair-wise. */
382  /* */
383  assert( reduct_op_func_ptr ); /* ToDo: Implement! */
384  }
385  }
386  else {
387  /* */
388  /* There is a null reduction target object so we don't need to reduce */
389  /* it across processors */
390  /* */
391  if( sub_vecs || sub_targ_vecs ) {
392  for( kc = 0; kc < num_cols; ++kc ) {
393  err = RTOp_apply_op(
394  op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
395  ,RTOp_REDUCT_OBJ_NULL
396  );
397  if (err) goto ERR_LABEL;
398  }
399  }
400  }
401 
402 ERR_LABEL:
403 
404  /* Clean up dynamically allocated memory! */
405 
406  if( i_reduct_objs_tmp != NULL )
407  free( i_reduct_objs_tmp );
408  if( mpi_op != MPI_OP_NULL )
409  MPI_Op_free( &mpi_op );
410  if( i_reduct_objs_ext != NULL )
411  free( i_reduct_objs_ext );
412  if( mpi_reduct_ext_type != MPI_DATATYPE_NULL )
413  MPI_Type_free( &mpi_reduct_ext_type );
414  if( i_reduct_objs != NULL ) {
415  for( kc = 0; kc < num_cols; ++kc )
416  RTOp_reduct_obj_free( op, &i_reduct_objs[0] );
417  free( i_reduct_objs );
418  }
419 
420  return err;
421 }
422 
423 #ifdef RTOP_TO_MPI_SHOW_TIMES
424 int RTOp_MPI_apply_op_print_timings = 0;
425 #endif
int RTOp_get_reduct_type_num_entries(const struct RTOp_RTOp *op, int *num_values, int *num_indexes, int *num_chars)
Definition: RTOp.c:209
int RTOp_load_reduct_obj_state(const struct RTOp_RTOp *op, int num_values, const RTOp_value_type value_data[], int num_indexes, const RTOp_index_type index_data[], int num_chars, const RTOp_char_type char_data[], RTOp_ReductTarget reduct_obj)
Definition: RTOp.c:307
int RTOp_reduct_obj_create(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
Definition: RTOp.c:230
int RTOp_reduce_reduct_objs(const struct RTOp_RTOp *op, RTOp_ReductTarget in_reduct_obj, RTOp_ReductTarget inout_reduct_obj)
Definition: RTOp.c:374
int RTOp_extract_reduct_obj_state(const struct RTOp_RTOp *op, const RTOp_ReductTarget reduct_obj, int num_values, RTOp_value_type value_data[], int num_indexes, RTOp_index_type index_data[], int num_chars, RTOp_char_type char_data[])
Definition: RTOp.c:280
int RTOp_reduct_obj_free(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
Definition: RTOp.c:267
int RTOp_apply_op(const struct RTOp_RTOp *op, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector targ_sub_vecs[], RTOp_ReductTarget reduct_obj)
Definition: RTOp.c:357
int RTOp_load_reduct_obj_ext_state(const struct RTOp_RTOp *op, const void *reduct_obj_ext, RTOp_ReductTarget reduct_obj)
Definition: RTOpToMPI.c:117
void RTOp_MPI_type_signature(const int num_values, const int num_indexes, const int num_chars, int *num_entries, int block_lengths[], MPI_Aint displacements[], MPI_Datatype datatypes[])
Definition: RTOpToMPI.c:53
int RTOp_MPI_apply_op(MPI_Comm comm, const struct RTOp_RTOp *op, int root_rank, const int num_cols, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[], RTOp_ReductTarget reduct_objs[])
Definition: RTOpToMPI.c:144
int RTOp_get_reduct_op(const struct RTOp_RTOp *op, RTOp_reduct_op_func_ptr_t *reduct_op_func_ptr)
Definition: RTOp.c:385
int RTOp_extract_reduct_obj_ext_state(const struct RTOp_RTOp *op, RTOp_ReductTarget reduct_obj, int num_values, int num_indexes, int num_chars, void *reduct_obj_ext)
Definition: RTOpToMPI.c:88