44 #include "RTOp_apply_op_mpi.h"
45 #include "RTOp_parallel_helpers.h"
46 #include "RTOpToMPI.h"
50 int RTOp_apply_op_mpi(
52 ,RTOp_index_type global_dim_in, RTOp_index_type local_sub_dim_in, RTOp_index_type local_offset_in
54 ,
const int num_vecs,
const RTOp_value_type* l_vec_ptrs[],
const ptrdiff_t l_vec_strides[],
const ptrdiff_t l_vec_leading_dim[]
55 ,
const int num_targ_vecs, RTOp_value_type* l_targ_vec_ptrs[],
const ptrdiff_t l_targ_vec_strides[],
const ptrdiff_t l_targ_vec_leading_dim[]
56 ,
const RTOp_index_type first_ele_in,
const RTOp_index_type sub_dim_in,
const RTOp_index_type global_offset_in
58 ,RTOp_ReductTarget reduct_objs[]
64 RTOp_index_type overlap_first_local_ele = 0;
65 RTOp_index_type overalap_local_sub_dim = 0;
66 RTOp_index_type overlap_global_offset = 0;
71 assert( num_vecs || num_targ_vecs );
73 assert( l_vec_ptrs != NULL );
75 assert( l_targ_vec_ptrs != NULL );
76 assert( 0 <= sub_dim_in && sub_dim_in <= global_dim_in );
80 local_vecs = malloc(
sizeof(
struct RTOp_SubVector) * num_vecs * num_cols );
81 for( kc = 0; kc < num_cols; ++kc ) {
82 for( k = 0; k < num_vecs; ++k )
88 for( kc = 0; kc < num_cols; ++kc ) {
89 for( k = 0; k < num_targ_vecs; ++k )
95 RTOp_parallel_calc_overlap(
96 global_dim_in, local_sub_dim_in, local_offset_in, first_ele_in, sub_dim_in, global_offset_in
97 ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset
99 if( overlap_first_local_ele != 0 ) {
102 for( kc = 0; kc < num_cols; ++kc ) {
103 for(k = 0; k < num_vecs; ++k) {
105 overlap_global_offset
106 ,overalap_local_sub_dim
107 ,l_vec_ptrs[k]+(overlap_first_local_ele-1)*l_vec_strides[k]
108 + ( num_cols > 1 ? kc*l_vec_leading_dim[k] : 0 )
110 ,&local_vecs[kc*num_cols+k]
113 for(k = 0; k < num_targ_vecs; ++k) {
115 overlap_global_offset
116 ,overalap_local_sub_dim
117 ,l_targ_vec_ptrs[k]+(overlap_first_local_ele-1)*l_targ_vec_strides[k]
118 + ( num_cols > 1 ? kc*l_targ_vec_leading_dim[k] : 0 )
119 ,l_targ_vec_strides[k]
120 ,&local_targ_vecs[kc*num_cols+k]
136 ,num_vecs, num_vecs && overlap_first_local_ele ? &local_vecs[0] : NULL
137 ,num_targ_vecs, num_targ_vecs && overlap_first_local_ele ? &local_targ_vecs[0] : NULL
141 if(local_vecs) free(local_vecs);
142 if(local_targ_vecs) free(local_targ_vecs);
void RTOp_mutable_sub_vector(RTOp_index_type global_offset, RTOp_index_type sub_dim, RTOp_value_type values[], ptrdiff_t values_stride, struct RTOp_MutableSubVector *sub_vec)
void RTOp_sub_vector(RTOp_index_type global_offset, RTOp_index_type sub_dim, const RTOp_value_type values[], ptrdiff_t values_stride, struct RTOp_SubVector *sub_vec)
void RTOp_mutable_sub_vector_null(struct RTOp_MutableSubVector *sub_vec)
int RTOp_MPI_apply_op(MPI_Comm comm, const struct RTOp_RTOp *op, int root_rank, const int num_cols, const int num_vecs, const struct RTOp_SubVector sub_vecs[], const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[], RTOp_ReductTarget reduct_objs[])
void RTOp_sub_vector_null(struct RTOp_SubVector *sub_vec)