13 do_times(
int work_count,
int num_eqns_begin,
int num_eqns_end,
15 double (*
func)(
unsigned int,
unsigned int,
double)) {
16 std::vector<double> times;
17 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
18 num_eqns += num_eqns_delta) {
19 int num_nodes = work_count / num_eqns;
20 double mesh_spacing = 1.0 / (num_nodes - 1);
21 times.push_back(
func(num_nodes, num_eqns, mesh_spacing));
26 template <
template <
typename>
class FadType>
30 std::vector<double> times;
31 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
32 num_eqns += num_eqns_delta) {
33 int num_nodes = work_count / num_eqns;
34 double mesh_spacing = 1.0 / (num_nodes - 1);
40 template <
template <
typename,
int>
class FadType>
44 const int slfad_max = 130;
45 std::vector<double> times;
46 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
47 num_eqns += num_eqns_delta) {
48 int num_nodes = work_count / num_eqns;
49 double mesh_spacing = 1.0 / (num_nodes - 1);
50 if (num_eqns*2 < slfad_max)
56 template <
template <
typename,
int>
class FadType>
60 std::vector<double> times;
61 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
62 num_eqns += num_eqns_delta) {
63 int num_nodes = work_count / num_eqns;
64 double mesh_spacing = 1.0 / (num_nodes - 1);
67 else if (num_eqns*2 == 30)
69 else if (num_eqns*2 == 50)
71 else if (num_eqns*2 == 70)
73 else if (num_eqns*2 == 90)
75 else if (num_eqns*2 == 110)
77 else if (num_eqns*2 == 130)
84 const std::vector<double>& base,
85 const std::string& name,
int p,
int w,
int w_name) {
86 std::cout.setf(std::ios::scientific);
87 std::cout.precision(p);
88 std::cout.setf(std::ios::right);
89 std::cout << std::setw(w_name) << name <<
" ";
90 std::cout.setf(std::ios::right);
91 for (
unsigned int i=0;
i<times.size();
i++)
92 std::cout << std::setw(w) << times[
i]/base[
i] <<
" ";
93 std::cout << std::endl;
96 int main(
int argc,
char* argv[]) {
106 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
107 int work_count = 200000;
108 int num_eqns_begin = 5;
109 int num_eqns_end = 65;
110 int num_eqns_delta = 10;
112 clp.
setOption(
"wc", &work_count,
"Work count = num_nodes*num_eqns");
113 clp.
setOption(
"p_begin", &num_eqns_begin,
"Intitial number of equations");
114 clp.
setOption(
"p_end", &num_eqns_end,
"Final number of equations");
115 clp.
setOption(
"p_delta", &num_eqns_delta,
"Step in number of equations");
116 clp.
setOption(
"rt", &rt,
"Include ADOL-C retaping test");
120 parseReturn= clp.
parse(argc, argv);
125 std::cout.setf(std::ios::right);
126 std::cout << std::setw(w_name) <<
"Name" <<
" ";
127 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
128 num_eqns += num_eqns_delta)
129 std::cout << std::setw(w) << num_eqns <<
" ";
130 std::cout << std::endl;
131 for (
int j=0; j<w_name; j++)
134 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
135 num_eqns += num_eqns_delta) {
136 for (
int j=0; j<w; j++)
140 std::cout << std::endl;
143 std::vector<double> times_analytic =
144 do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta,
146 print_times(times_analytic, times_analytic,
"Analytic", p, w, w_name);
152 std::vector<double> times_adic =
153 do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta,
155 print_times(times_adic, times_analytic,
"ADIC", p, w, w_name);
159 std::vector<double> times_sfad =
160 do_times_sfad<Sacado::Fad::SFad>(
161 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
162 print_times(times_sfad, times_analytic,
"SFAD", p, w, w_name);
164 std::vector<double> times_slfad =
165 do_times_sfad<Sacado::Fad::SLFad>(
166 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
167 print_times(times_slfad, times_analytic,
"SLFAD", p, w, w_name);
169 std::vector<double> times_dfad =
170 do_times_fad<Sacado::Fad::DFad>(
171 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
172 print_times(times_dfad, times_analytic,
"DFAD", p, w, w_name);
176 std::vector<double> times_elr_sfad =
177 do_times_sfad<Sacado::ELRFad::SFad>(
178 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
179 print_times(times_elr_sfad, times_analytic,
"ELRSFAD", p, w, w_name);
181 std::vector<double> times_elr_slfad =
182 do_times_sfad<Sacado::ELRFad::SLFad>(
183 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
184 print_times(times_elr_slfad, times_analytic,
"ELRSLFAD", p, w, w_name);
186 std::vector<double> times_elr_dfad =
187 do_times_fad<Sacado::ELRFad::DFad>(
188 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
189 print_times(times_elr_dfad, times_analytic,
"ELRDFAD", p, w, w_name);
193 std::vector<double> times_cache_sfad =
194 do_times_sfad<Sacado::CacheFad::SFad>(
195 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
196 print_times(times_cache_sfad, times_analytic,
"CacheSFAD", p, w, w_name);
198 std::vector<double> times_cache_slfad =
199 do_times_sfad<Sacado::CacheFad::SLFad>(
200 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
201 print_times(times_cache_slfad, times_analytic,
"CacheSLFAD", p, w, w_name);
203 std::vector<double> times_cache_dfad =
204 do_times_fad<Sacado::CacheFad::DFad>(
205 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
206 print_times(times_cache_dfad, times_analytic,
"CacheDFAD", p, w, w_name);
209 std::vector<double> times_cache_elr_sfad =
210 do_times_sfad<Sacado::ELRCacheFad::SFad>(
211 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
212 print_times(times_cache_elr_sfad, times_analytic,
"ELRCacheSFAD", p, w, w_name);
214 std::vector<double> times_cache_elr_slfad =
215 do_times_sfad<Sacado::ELRCacheFad::SLFad>(
216 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
217 print_times(times_cache_elr_slfad, times_analytic,
"ELRCacheSLFAD", p, w, w_name);
219 std::vector<double> times_cache_elr_dfad =
220 do_times_fad<Sacado::ELRCacheFad::DFad>(
221 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
222 print_times(times_cache_elr_dfad, times_analytic,
"ELRCacheDFAD", p, w, w_name);
225 catch (std::exception& e) {
226 std::cout << e.what() << std::endl;
229 catch (
const char *s) {
230 std::cout << s << std::endl;
234 std::cout <<
"Caught unknown exception!" << std::endl;
void do_times(const T x[], int nloop, Teuchos::Array< double > ×)
double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
std::vector< double > do_times_sfad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
void print_times(const std::string &screen_name, const std::string &file_name)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > do_times_fad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
std::vector< double > do_times_slfad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
void setDocString(const char doc_string[])
const T func(int n, T *x)
double fad_jac_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)