48 #ifdef RTOP_TO_MPI_SHOW_TIMES
55 ,
const int num_indexes
65 block_lengths[k] = 3 + num_values;
72 block_lengths[k] = num_indexes;
73 displacements[k] = off;
80 block_lengths[k] = num_chars;
81 displacements[k] = off;
105 *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off) = num_values;
106 *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off) = num_indexes;
107 *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off) = num_chars;
119 ,
const void* reduct_obj_ext
126 num_values = *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off),
128 num_indexes = *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off),
130 num_chars = *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off),
153 int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
154 num_reduct_type_chars = 0, num_reduct_type_entries = 0;
156 int target_type_block_lengths[3];
157 MPI_Aint target_type_displacements[3];
160 int reduct_obj_ext_size = 0;
161 char *i_reduct_objs_ext = NULL;
162 char *i_reduct_objs_tmp = NULL;
163 RTOp_reduct_op_func_ptr_t reduct_op_func_ptr = NULL;
168 #ifdef RTOP_TO_MPI_SHOW_TIMES
169 const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
170 clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
175 if( sub_vecs || sub_targ_vecs ) {
176 for( kc = 0; kc < num_cols; ++kc ) {
178 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
181 if (err)
goto ERR_LABEL;
197 op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
198 if(err)
goto ERR_LABEL;
199 num_reduct_type_entries
200 = (num_reduct_type_values ? 1 : 0)
201 + (num_reduct_type_indexes ? 1 : 0)
202 + (num_reduct_type_chars ? 1 : 0);
204 if( num_reduct_type_entries ) {
205 #ifdef RTOP_TO_MPI_SHOW_TIMES
207 printf(
"RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
208 ticks_start_start = clock();
219 for( kc = 0; kc < num_cols; ++kc ) {
223 #ifdef RTOP_TO_MPI_SHOW_TIMES
225 printf(
"calling RTOp_apply_op(...)");
226 ticks_start = clock();
229 if( sub_vecs || sub_targ_vecs ) {
230 for( kc = 0; kc < num_cols; ++kc ) {
232 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
235 if (err)
goto ERR_LABEL;
238 #ifdef RTOP_TO_MPI_SHOW_TIMES
241 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
245 if( num_reduct_type_values == 0 ) ++num_reduct_type_entries;
247 num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
248 ,&num_reduct_type_entries
249 ,target_type_block_lengths, target_type_displacements, target_type_datatypes
252 #ifdef RTOP_TO_MPI_SHOW_TIMES
254 printf(
"calling MPI_Type_struct(...)");
255 ticks_start = clock();
259 , target_type_block_lengths, target_type_displacements
260 , target_type_datatypes, &mpi_reduct_ext_type );
261 #ifdef RTOP_TO_MPI_SHOW_TIMES
264 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
267 if(err)
goto ERR_LABEL;
268 #ifdef RTOP_TO_MPI_SHOW_TIMES
270 printf(
"calling MPI_Type_commit(...)");
271 ticks_start = clock();
275 #ifdef RTOP_TO_MPI_SHOW_TIMES
278 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
281 if(err)
goto ERR_LABEL;
287 reduct_obj_ext_size =
291 i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
292 for( kc = 0; kc < num_cols; ++kc ) {
294 op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
295 ,i_reduct_objs_ext+kc*reduct_obj_ext_size
303 if( reduct_op_func_ptr != NULL ) {
310 #ifdef RTOP_TO_MPI_SHOW_TIMES
312 printf(
"calling MPI_Op_create(...)");
313 ticks_start = clock();
321 #ifdef RTOP_TO_MPI_SHOW_TIMES
324 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
327 if(err)
goto ERR_LABEL;
328 if( root_rank >= 0 ) {
332 i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
335 ,rank == root_rank ? i_reduct_objs_tmp : NULL
336 ,num_cols, mpi_reduct_ext_type
337 ,mpi_op, root_rank, comm
339 if(err)
goto ERR_LABEL;
340 if( rank == root_rank ) {
341 for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
347 i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
348 #ifdef RTOP_TO_MPI_SHOW_TIMES
350 printf(
"calling MPI_Allreduce(...)");
351 ticks_start = clock();
355 i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
356 ,mpi_reduct_ext_type, mpi_op, comm
358 #ifdef RTOP_TO_MPI_SHOW_TIMES
361 printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
364 if(err)
goto ERR_LABEL;
365 for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
368 for( kc = 0; kc < num_cols; ++kc ) {
383 assert( reduct_op_func_ptr );
391 if( sub_vecs || sub_targ_vecs ) {
392 for( kc = 0; kc < num_cols; ++kc ) {
394 op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
397 if (err)
goto ERR_LABEL;
406 if( i_reduct_objs_tmp != NULL )
407 free( i_reduct_objs_tmp );
410 if( i_reduct_objs_ext != NULL )
411 free( i_reduct_objs_ext );
414 if( i_reduct_objs != NULL ) {
415 for( kc = 0; kc < num_cols; ++kc )
417 free( i_reduct_objs );
423 #ifdef RTOP_TO_MPI_SHOW_TIMES
int RTOp_get_reduct_type_num_entries(const struct RTOp_RTOp *op, int *num_values, int *num_indexes, int *num_chars)
int MPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
int MPI_Type_free(MPI_Datatype *op)
int RTOp_load_reduct_obj_state(const struct RTOp_RTOp *op, int num_values, const RTOp_value_type value_data[], int num_indexes, const RTOp_index_type index_data[], int num_chars, const RTOp_char_type char_data[], RTOp_ReductTarget reduct_obj)
int MPI_Type_commit(MPI_Datatype *datatype)
int MPI_Comm_rank(MPI_Comm comm, int *rank)
int RTOp_reduct_obj_create(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_reduce_reduct_objs(const struct RTOp_RTOp *op, RTOp_ReductTarget in_reduct_obj, RTOp_ReductTarget inout_reduct_obj)
#define RTOpMPI_CHAR_TYPE
int RTOp_extract_reduct_obj_state(const struct RTOp_RTOp *op, const RTOp_ReductTarget reduct_obj, int num_values, RTOp_value_type value_data[], int num_indexes, RTOp_index_type index_data[], int num_chars, RTOp_char_type char_data[])
int MPI_Op_create(MPI_User_function *func, int communitive, MPI_Op *op)
int RTOp_reduct_obj_free(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_apply_op(const struct RTOp_RTOp *op, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector targ_sub_vecs[], RTOp_ReductTarget reduct_obj)
#define MPI_DATATYPE_NULL
int MPI_Type_struct(int count, int *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Datatype *array_of_types, MPI_Datatype *data_type)
#define RTOpMPI_VALUE_TYPE
int RTOp_MPI_apply_op_print_timings
MPI_Datatype RTOp_Datatype
#define RTOpMPI_INDEX_TYPE
int MPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
int MPI_Op_free(MPI_Op *op)
int RTOp_load_reduct_obj_ext_state(const struct RTOp_RTOp *op, const void *reduct_obj_ext, RTOp_ReductTarget reduct_obj)
void RTOp_MPI_type_signature(const int num_values, const int num_indexes, const int num_chars, int *num_entries, int block_lengths[], MPI_Aint displacements[], MPI_Datatype datatypes[])
int RTOp_MPI_apply_op(MPI_Comm comm, const struct RTOp_RTOp *op, int root_rank, const int num_cols, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[], RTOp_ReductTarget reduct_objs[])
int RTOp_get_reduct_op(const struct RTOp_RTOp *op, RTOp_reduct_op_func_ptr_t *reduct_op_func_ptr)
int RTOp_extract_reduct_obj_ext_state(const struct RTOp_RTOp *op, RTOp_ReductTarget reduct_obj, int num_values, int num_indexes, int num_chars, void *reduct_obj_ext)
Teuchos_Ordinal RTOp_index_type
#define RTOp_REDUCT_OBJ_NULL