48 #ifdef RTOP_TO_MPI_SHOW_TIMES 
   55   ,
const int num_indexes
 
   65   block_lengths[k] = 3 + num_values; 
 
   72     block_lengths[k] = num_indexes;
 
   73     displacements[k] = off;
 
   80     block_lengths[k] = num_chars;
 
   81     displacements[k] = off;
 
  105   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off)  = num_values;
 
  106   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off) = num_indexes;
 
  107   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off)   = num_chars;
 
  119   ,
const void*              reduct_obj_ext
 
  126     num_values      =                   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_values_off),
 
  128     num_indexes     =                   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_indexes_off),
 
  130     num_chars       =                   *(
RTOp_value_type*)((
char*)reduct_obj_ext + num_chars_off),
 
  153   int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
 
  154     num_reduct_type_chars = 0,  num_reduct_type_entries = 0;
 
  156   int                         target_type_block_lengths[3];
 
  157   MPI_Aint                    target_type_displacements[3];
 
  160   int                         reduct_obj_ext_size = 0;
 
  161   char                        *i_reduct_objs_ext = NULL;
 
  162   char                        *i_reduct_objs_tmp = NULL;
 
  163   RTOp_reduct_op_func_ptr_t   reduct_op_func_ptr  = NULL;
 
  168 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  169     const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
 
  170   clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
 
  175     if( sub_vecs || sub_targ_vecs ) {
 
  176       for( kc = 0; kc < num_cols; ++kc ) {
 
  178           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
 
  181         if (err) 
goto ERR_LABEL;
 
  197     op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
 
  198   if(err) 
goto ERR_LABEL;
 
  199   num_reduct_type_entries
 
  200     = (num_reduct_type_values ? 1 : 0)
 
  201     + (num_reduct_type_indexes ? 1 : 0)
 
  202     + (num_reduct_type_chars ? 1 : 0);
 
  204   if( num_reduct_type_entries ) {
 
  205 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  207       printf(
"RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
 
  208       ticks_start_start = clock();
 
  219     for( kc = 0; kc < num_cols; ++kc ) {
 
  223 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  225       printf(
"calling RTOp_apply_op(...)");
 
  226       ticks_start = clock();
 
  229     if( sub_vecs || sub_targ_vecs ) {
 
  230       for( kc = 0; kc < num_cols; ++kc ) {
 
  232           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
 
  235         if (err) 
goto ERR_LABEL;
 
  238 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  241       printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
 
  245     if( num_reduct_type_values == 0 ) ++num_reduct_type_entries; 
 
  247       num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
 
  248       ,&num_reduct_type_entries
 
  249       ,target_type_block_lengths, target_type_displacements, target_type_datatypes
 
  252 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  254       printf(
"calling MPI_Type_struct(...)");
 
  255       ticks_start = clock();
 
  259                  , target_type_block_lengths, target_type_displacements
 
  260                  , target_type_datatypes, &mpi_reduct_ext_type );
 
  261 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  264       printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
 
  267     if(err) 
goto ERR_LABEL;
 
  268 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  270       printf(
"calling MPI_Type_commit(...)");
 
  271       ticks_start = clock();
 
  275 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  278       printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
 
  281     if(err) 
goto ERR_LABEL;
 
  287     reduct_obj_ext_size =
 
  291     i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
 
  292     for( kc = 0; kc < num_cols; ++kc ) {
 
  294         op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
 
  295         ,i_reduct_objs_ext+kc*reduct_obj_ext_size
 
  303     if( reduct_op_func_ptr != NULL ) {
 
  310 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  312         printf(
"calling MPI_Op_create(...)");
 
  313         ticks_start = clock();
 
  321 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  324         printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
 
  327       if(err) 
goto ERR_LABEL;
 
  328       if( root_rank >= 0 ) {
 
  332         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
 
  335           ,rank == root_rank ? i_reduct_objs_tmp : NULL  
 
  336           ,num_cols, mpi_reduct_ext_type
 
  337           ,mpi_op, root_rank, comm
 
  339         if(err) 
goto ERR_LABEL;
 
  340         if( rank == root_rank ) { 
 
  341           for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
 
  347         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
 
  348 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  350           printf(
"calling MPI_Allreduce(...)");
 
  351           ticks_start = clock();
 
  355           i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
 
  356           ,mpi_reduct_ext_type, mpi_op, comm
 
  358 #ifdef RTOP_TO_MPI_SHOW_TIMES 
  361           printf(
" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
 
  364         if(err) 
goto ERR_LABEL;
 
  365         for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
 
  368       for( kc = 0; kc < num_cols; ++kc ) {
 
  383       assert( reduct_op_func_ptr ); 
 
  391     if( sub_vecs || sub_targ_vecs ) {
 
  392       for( kc = 0; kc < num_cols; ++kc ) {
 
  394           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
 
  397         if (err) 
goto ERR_LABEL;
 
  406   if( i_reduct_objs_tmp != NULL )
 
  407     free( i_reduct_objs_tmp );
 
  410   if( i_reduct_objs_ext != NULL )
 
  411     free( i_reduct_objs_ext );
 
  414   if( i_reduct_objs != NULL ) {
 
  415     for( kc = 0; kc < num_cols; ++kc )
 
  417     free( i_reduct_objs );
 
  423 #ifdef RTOP_TO_MPI_SHOW_TIMES 
int RTOp_get_reduct_type_num_entries(const struct RTOp_RTOp *op, int *num_values, int *num_indexes, int *num_chars)
int MPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
int MPI_Type_free(MPI_Datatype *op)
int RTOp_load_reduct_obj_state(const struct RTOp_RTOp *op, int num_values, const RTOp_value_type value_data[], int num_indexes, const RTOp_index_type index_data[], int num_chars, const RTOp_char_type char_data[], RTOp_ReductTarget reduct_obj)
int MPI_Type_commit(MPI_Datatype *datatype)
int MPI_Comm_rank(MPI_Comm comm, int *rank)
int RTOp_reduct_obj_create(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_reduce_reduct_objs(const struct RTOp_RTOp *op, RTOp_ReductTarget in_reduct_obj, RTOp_ReductTarget inout_reduct_obj)
#define RTOpMPI_CHAR_TYPE
int RTOp_extract_reduct_obj_state(const struct RTOp_RTOp *op, const RTOp_ReductTarget reduct_obj, int num_values, RTOp_value_type value_data[], int num_indexes, RTOp_index_type index_data[], int num_chars, RTOp_char_type char_data[])
int MPI_Op_create(MPI_User_function *func, int communitive, MPI_Op *op)
int RTOp_reduct_obj_free(const struct RTOp_RTOp *op, RTOp_ReductTarget *reduct_obj)
int RTOp_apply_op(const struct RTOp_RTOp *op, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector targ_sub_vecs[], RTOp_ReductTarget reduct_obj)
#define MPI_DATATYPE_NULL
int MPI_Type_struct(int count, int *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Datatype *array_of_types, MPI_Datatype *data_type)
#define RTOpMPI_VALUE_TYPE
int RTOp_MPI_apply_op_print_timings
MPI_Datatype RTOp_Datatype
#define RTOpMPI_INDEX_TYPE
int MPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
int MPI_Op_free(MPI_Op *op)
int RTOp_load_reduct_obj_ext_state(const struct RTOp_RTOp *op, const void *reduct_obj_ext, RTOp_ReductTarget reduct_obj)
void RTOp_MPI_type_signature(const int num_values, const int num_indexes, const int num_chars, int *num_entries, int block_lengths[], MPI_Aint displacements[], MPI_Datatype datatypes[])
int RTOp_MPI_apply_op(MPI_Comm comm, const struct RTOp_RTOp *op, int root_rank, const int num_cols, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[], RTOp_ReductTarget reduct_objs[])
int RTOp_get_reduct_op(const struct RTOp_RTOp *op, RTOp_reduct_op_func_ptr_t *reduct_op_func_ptr)
int RTOp_extract_reduct_obj_ext_state(const struct RTOp_RTOp *op, RTOp_ReductTarget reduct_obj, int num_values, int num_indexes, int num_chars, void *reduct_obj_ext)
Teuchos_Ordinal RTOp_index_type
#define RTOp_REDUCT_OBJ_NULL