44 #include "RTOpToMPI.h"
48 #ifdef RTOP_TO_MPI_SHOW_TIMES
55 ,
const int num_indexes
59 ,MPI_Aint displacements[]
60 ,MPI_Datatype datatypes[]
65 block_lengths[k] = 3 + num_values;
67 datatypes[k] = RTOpMPI_VALUE_TYPE;
69 off += (3 + num_values) *
sizeof(RTOp_value_type);
72 block_lengths[k] = num_indexes;
73 displacements[k] = off;
74 datatypes[k] = RTOpMPI_INDEX_TYPE;
76 off += num_indexes *
sizeof(RTOp_index_type);
80 block_lengths[k] = num_chars;
81 displacements[k] = off;
82 datatypes[k] = RTOpMPI_CHAR_TYPE;
90 ,RTOp_ReductTarget reduct_obj
100 num_indexes_off = num_values_off +
sizeof(RTOp_value_type),
101 num_chars_off = num_indexes_off +
sizeof(RTOp_value_type),
102 values_off = num_chars_off +
sizeof(RTOp_value_type),
103 indexes_off = values_off + num_values *
sizeof(RTOp_value_type),
104 chars_off = indexes_off + num_indexes *
sizeof(RTOp_index_type);
105 *(RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off) = num_values;
106 *(RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off) = num_indexes;
107 *(RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off) = num_chars;
110 ,num_values, (RTOp_value_type*)((
char*)reduct_obj_ext + values_off)
111 ,num_indexes, (RTOp_index_type*)((
char*)reduct_obj_ext + indexes_off)
112 ,num_chars, (RTOp_char_type*)((
char*)reduct_obj_ext + chars_off)
119 ,
const void* reduct_obj_ext
120 ,RTOp_ReductTarget reduct_obj
126 num_values = *(RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off),
127 num_indexes_off = num_values_off +
sizeof(RTOp_value_type),
128 num_indexes = *(RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off),
129 num_chars_off = num_indexes_off +
sizeof(RTOp_value_type),
130 num_chars = *(RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off),
131 values_off = num_chars_off +
sizeof(RTOp_value_type),
132 indexes_off = values_off + num_values *
sizeof(RTOp_value_type),
133 chars_off = indexes_off + num_indexes *
sizeof(RTOp_index_type);
136 ,num_values, (RTOp_value_type*)((
char*)reduct_obj_ext + values_off)
137 ,num_indexes, (RTOp_index_type*)((
char*)reduct_obj_ext + indexes_off)
138 ,num_chars, (RTOp_char_type*)((
char*) reduct_obj_ext + chars_off)
145 MPI_Comm comm,
const struct RTOp_RTOp* op,
int root_rank
149 ,RTOp_ReductTarget reduct_objs[]
153 int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
154 num_reduct_type_chars = 0, num_reduct_type_entries = 0;
155 RTOp_ReductTarget *i_reduct_objs = NULL;
156 int target_type_block_lengths[3];
157 MPI_Aint target_type_displacements[3];
158 RTOp_Datatype target_type_datatypes[3];
159 MPI_Datatype mpi_reduct_ext_type = MPI_DATATYPE_NULL;
160 int reduct_obj_ext_size = 0;
161 char *i_reduct_objs_ext = NULL;
162 char *i_reduct_objs_tmp = NULL;
163 RTOp_reduct_op_func_ptr_t reduct_op_func_ptr = NULL;
164 MPI_Op mpi_op = MPI_OP_NULL;
168 #ifdef RTOP_TO_MPI_SHOW_TIMES
169 const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
170 clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
173 if( comm == MPI_COMM_NULL ) {
175 if( sub_vecs || sub_targ_vecs ) {
176 for( kc = 0; kc < num_cols; ++kc ) {
178 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
179 ,reduct_objs ? reduct_objs[kc] : RTOp_REDUCT_OBJ_NULL
181 if (err)
goto ERR_LABEL;
197 op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
198 if(err)
goto ERR_LABEL;
199 num_reduct_type_entries
200 = (num_reduct_type_values ? 1 : 0)
201 + (num_reduct_type_indexes ? 1 : 0)
202 + (num_reduct_type_chars ? 1 : 0);
204 if( num_reduct_type_entries ) {
205 #ifdef RTOP_TO_MPI_SHOW_TIMES
206 if(RTOp_MPI_apply_op_print_timings) {
207 printf(
"RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
208 ticks_start_start = clock();
218 i_reduct_objs = malloc(
sizeof(RTOp_ReductTarget) * num_cols );
219 for( kc = 0; kc < num_cols; ++kc ) {
220 i_reduct_objs[kc] = RTOp_REDUCT_OBJ_NULL;
223 #ifdef RTOP_TO_MPI_SHOW_TIMES
224 if(RTOp_MPI_apply_op_print_timings) {
225 printf(
"calling RTOp_apply_op(...)");
226 ticks_start = clock();
229 if( sub_vecs || sub_targ_vecs ) {
230 for( kc = 0; kc < num_cols; ++kc ) {
232 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
235 if (err)
goto ERR_LABEL;
238 #ifdef RTOP_TO_MPI_SHOW_TIMES
239 if(RTOp_MPI_apply_op_print_timings) {
241 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
245 if( num_reduct_type_values == 0 ) ++num_reduct_type_entries;
247 num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
248 ,&num_reduct_type_entries
249 ,target_type_block_lengths, target_type_displacements, target_type_datatypes
252 #ifdef RTOP_TO_MPI_SHOW_TIMES
253 if(RTOp_MPI_apply_op_print_timings) {
254 printf(
"calling MPI_Type_struct(...)");
255 ticks_start = clock();
258 err = MPI_Type_struct( num_reduct_type_entries
259 , target_type_block_lengths, target_type_displacements
260 , target_type_datatypes, &mpi_reduct_ext_type );
261 #ifdef RTOP_TO_MPI_SHOW_TIMES
262 if(RTOp_MPI_apply_op_print_timings) {
264 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
267 if(err)
goto ERR_LABEL;
268 #ifdef RTOP_TO_MPI_SHOW_TIMES
269 if(RTOp_MPI_apply_op_print_timings) {
270 printf(
"calling MPI_Type_commit(...)");
271 ticks_start = clock();
274 err = MPI_Type_commit( &mpi_reduct_ext_type );
275 #ifdef RTOP_TO_MPI_SHOW_TIMES
276 if(RTOp_MPI_apply_op_print_timings) {
278 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
281 if(err)
goto ERR_LABEL;
287 reduct_obj_ext_size =
288 (3 + num_reduct_type_values) *
sizeof(RTOp_value_type) +
289 num_reduct_type_indexes *
sizeof(RTOp_index_type) +
290 num_reduct_type_chars *
sizeof(RTOp_char_type);
291 i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
292 for( kc = 0; kc < num_cols; ++kc ) {
294 op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
295 ,i_reduct_objs_ext+kc*reduct_obj_ext_size
303 if( reduct_op_func_ptr != NULL ) {
310 #ifdef RTOP_TO_MPI_SHOW_TIMES
311 if(RTOp_MPI_apply_op_print_timings) {
312 printf(
"calling MPI_Op_create(...)");
313 ticks_start = clock();
321 #ifdef RTOP_TO_MPI_SHOW_TIMES
322 if(RTOp_MPI_apply_op_print_timings) {
324 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
327 if(err)
goto ERR_LABEL;
328 if( root_rank >= 0 ) {
329 MPI_Comm_rank( comm, &rank );
332 i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
335 ,rank == root_rank ? i_reduct_objs_tmp : NULL
336 ,num_cols, mpi_reduct_ext_type
337 ,mpi_op, root_rank, comm
339 if(err)
goto ERR_LABEL;
340 if( rank == root_rank ) {
341 for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
347 i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
348 #ifdef RTOP_TO_MPI_SHOW_TIMES
349 if(RTOp_MPI_apply_op_print_timings) {
350 printf(
"calling MPI_Allreduce(...)");
351 ticks_start = clock();
355 i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
356 ,mpi_reduct_ext_type, mpi_op, comm
358 #ifdef RTOP_TO_MPI_SHOW_TIMES
359 if(RTOp_MPI_apply_op_print_timings) {
361 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
364 if(err)
goto ERR_LABEL;
365 for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
368 for( kc = 0; kc < num_cols; ++kc ) {
383 assert( reduct_op_func_ptr );
391 if( sub_vecs || sub_targ_vecs ) {
392 for( kc = 0; kc < num_cols; ++kc ) {
394 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
395 ,RTOp_REDUCT_OBJ_NULL
397 if (err)
goto ERR_LABEL;
406 if( i_reduct_objs_tmp != NULL )
407 free( i_reduct_objs_tmp );
408 if( mpi_op != MPI_OP_NULL )
409 MPI_Op_free( &mpi_op );
410 if( i_reduct_objs_ext != NULL )
411 free( i_reduct_objs_ext );
412 if( mpi_reduct_ext_type != MPI_DATATYPE_NULL )
413 MPI_Type_free( &mpi_reduct_ext_type );
414 if( i_reduct_objs != NULL ) {
415 for( kc = 0; kc < num_cols; ++kc )
417 free( i_reduct_objs );
423 #ifdef RTOP_TO_MPI_SHOW_TIMES
424 int RTOp_MPI_apply_op_print_timings = 0;
int RTOp_get_reduct_type_num_entries(const struct RTOp_RTOp *op, int *num_values, int *num_indexes, int *num_chars)
int RTOp_load_reduct_obj_state(const struct RTOp_RTOp *op, int num_values, const RTOp_value_type value_data[], int num_indexes, const RTOp_index_type index_data[], int num_chars, const RTOp_char_type char_data[], RTOp_ReductTarget reduct_obj)
int RTOp_reduct_obj_create(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_reduce_reduct_objs(const struct RTOp_RTOp *op, RTOp_ReductTarget in_reduct_obj, RTOp_ReductTarget inout_reduct_obj)
int RTOp_extract_reduct_obj_state(const struct RTOp_RTOp *op, const RTOp_ReductTarget reduct_obj, int num_values, RTOp_value_type value_data[], int num_indexes, RTOp_index_type index_data[], int num_chars, RTOp_char_type char_data[])
int RTOp_reduct_obj_free(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_apply_op(const struct RTOp_RTOp *op, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector targ_sub_vecs[], RTOp_ReductTarget reduct_obj)
int RTOp_load_reduct_obj_ext_state(const struct RTOp_RTOp *op, const void *reduct_obj_ext, RTOp_ReductTarget reduct_obj)
void RTOp_MPI_type_signature(const int num_values, const int num_indexes, const int num_chars, int *num_entries, int block_lengths[], MPI_Aint displacements[], MPI_Datatype datatypes[])
int RTOp_MPI_apply_op(MPI_Comm comm, const struct RTOp_RTOp *op, int root_rank, const int num_cols, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[], RTOp_ReductTarget reduct_objs[])
int RTOp_get_reduct_op(const struct RTOp_RTOp *op, RTOp_reduct_op_func_ptr_t *reduct_op_func_ptr)
int RTOp_extract_reduct_obj_ext_state(const struct RTOp_RTOp *op, RTOp_ReductTarget reduct_obj, int num_values, int num_indexes, int num_chars, void *reduct_obj_ext)