33 do_times(
int work_count,
int num_eqns_begin,
int num_eqns_end,
35 double (*
func)(
unsigned int,
unsigned int,
double)) {
36 std::vector<double> times;
37 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
38 num_eqns += num_eqns_delta) {
39 int num_nodes = work_count / num_eqns;
40 double mesh_spacing = 1.0 / (num_nodes - 1);
41 times.push_back(
func(num_nodes, num_eqns, mesh_spacing));
46 template <
template <
typename>
class FadType>
50 std::vector<double> times;
51 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
52 num_eqns += num_eqns_delta) {
53 int num_nodes = work_count / num_eqns;
54 double mesh_spacing = 1.0 / (num_nodes - 1);
60 template <
template <
typename,
int>
class FadType>
64 const int slfad_max = 130;
65 std::vector<double> times;
66 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
67 num_eqns += num_eqns_delta) {
68 int num_nodes = work_count / num_eqns;
69 double mesh_spacing = 1.0 / (num_nodes - 1);
70 if (num_eqns*2 < slfad_max)
76 template <
template <
typename,
int>
class FadType>
80 std::vector<double> times;
81 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
82 num_eqns += num_eqns_delta) {
83 int num_nodes = work_count / num_eqns;
84 double mesh_spacing = 1.0 / (num_nodes - 1);
87 else if (num_eqns*2 == 30)
89 else if (num_eqns*2 == 50)
91 else if (num_eqns*2 == 70)
93 else if (num_eqns*2 == 90)
95 else if (num_eqns*2 == 110)
97 else if (num_eqns*2 == 130)
104 const std::vector<double>& base,
105 const std::string& name,
int p,
int w,
int w_name) {
106 std::cout.setf(std::ios::scientific);
107 std::cout.precision(p);
108 std::cout.setf(std::ios::right);
109 std::cout << std::setw(w_name) << name <<
" ";
110 std::cout.setf(std::ios::right);
111 for (
unsigned int i=0; i<times.size(); i++)
112 std::cout << std::setw(w) << times[i]/base[i] <<
" ";
113 std::cout << std::endl;
116 int main(
int argc,
char* argv[]) {
126 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
127 int work_count = 200000;
128 int num_eqns_begin = 5;
129 int num_eqns_end = 65;
130 int num_eqns_delta = 10;
132 clp.
setOption(
"wc", &work_count,
"Work count = num_nodes*num_eqns");
133 clp.
setOption(
"p_begin", &num_eqns_begin,
"Intitial number of equations");
134 clp.
setOption(
"p_end", &num_eqns_end,
"Final number of equations");
135 clp.
setOption(
"p_delta", &num_eqns_delta,
"Step in number of equations");
136 clp.
setOption(
"rt", &rt,
"Include ADOL-C retaping test");
140 parseReturn= clp.
parse(argc, argv);
145 std::cout.setf(std::ios::right);
146 std::cout << std::setw(w_name) <<
"Name" <<
" ";
147 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
148 num_eqns += num_eqns_delta)
149 std::cout << std::setw(w) << num_eqns <<
" ";
150 std::cout << std::endl;
151 for (
int j=0; j<w_name; j++)
154 for (
int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end;
155 num_eqns += num_eqns_delta) {
156 for (
int j=0; j<w; j++)
160 std::cout << std::endl;
163 std::vector<double> times_analytic =
164 do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta,
166 print_times(times_analytic, times_analytic,
"Analytic", p, w, w_name);
172 std::vector<double> times_adic =
173 do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta,
175 print_times(times_adic, times_analytic,
"ADIC", p, w, w_name);
179 std::vector<double> times_sfad =
180 do_times_sfad<Sacado::Fad::SFad>(
181 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
182 print_times(times_sfad, times_analytic,
"SFAD", p, w, w_name);
184 std::vector<double> times_slfad =
185 do_times_sfad<Sacado::Fad::SLFad>(
186 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
187 print_times(times_slfad, times_analytic,
"SLFAD", p, w, w_name);
189 std::vector<double> times_dfad =
190 do_times_fad<Sacado::Fad::DFad>(
191 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
192 print_times(times_dfad, times_analytic,
"DFAD", p, w, w_name);
196 std::vector<double> times_elr_sfad =
197 do_times_sfad<Sacado::ELRFad::SFad>(
198 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
199 print_times(times_elr_sfad, times_analytic,
"ELRSFAD", p, w, w_name);
201 std::vector<double> times_elr_slfad =
202 do_times_sfad<Sacado::ELRFad::SLFad>(
203 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
204 print_times(times_elr_slfad, times_analytic,
"ELRSLFAD", p, w, w_name);
206 std::vector<double> times_elr_dfad =
207 do_times_fad<Sacado::ELRFad::DFad>(
208 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
209 print_times(times_elr_dfad, times_analytic,
"ELRDFAD", p, w, w_name);
213 std::vector<double> times_cache_sfad =
214 do_times_sfad<Sacado::CacheFad::SFad>(
215 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
216 print_times(times_cache_sfad, times_analytic,
"CacheSFAD", p, w, w_name);
218 std::vector<double> times_cache_slfad =
219 do_times_sfad<Sacado::CacheFad::SLFad>(
220 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
221 print_times(times_cache_slfad, times_analytic,
"CacheSLFAD", p, w, w_name);
223 std::vector<double> times_cache_dfad =
224 do_times_fad<Sacado::CacheFad::DFad>(
225 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
226 print_times(times_cache_dfad, times_analytic,
"CacheDFAD", p, w, w_name);
229 std::vector<double> times_cache_elr_sfad =
230 do_times_sfad<Sacado::ELRCacheFad::SFad>(
231 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
232 print_times(times_cache_elr_sfad, times_analytic,
"ELRCacheSFAD", p, w, w_name);
234 std::vector<double> times_cache_elr_slfad =
235 do_times_sfad<Sacado::ELRCacheFad::SLFad>(
236 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
237 print_times(times_cache_elr_slfad, times_analytic,
"ELRCacheSLFAD", p, w, w_name);
239 std::vector<double> times_cache_elr_dfad =
240 do_times_fad<Sacado::ELRCacheFad::DFad>(
241 work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
242 print_times(times_cache_elr_dfad, times_analytic,
"ELRCacheDFAD", p, w, w_name);
245 catch (std::exception& e) {
246 std::cout << e.what() << std::endl;
249 catch (
const char *s) {
250 std::cout << s << std::endl;
254 std::cout <<
"Caught unknown exception!" << std::endl;
void do_times(const T x[], int nloop, Teuchos::Array< double > ×)
double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
std::vector< double > do_times_sfad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
void print_times(const std::string &screen_name, const std::string &file_name)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > do_times_fad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
std::vector< double > do_times_slfad(int work_count, int num_eqns_begin, int num_eqns_end, int num_eqns_delta)
void setDocString(const char doc_string[])
const T func(int n, T *x)
double fad_jac_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)