Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rad_fe_adj_fill.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Sacado_No_Kokkos.hpp"
11 #include "Sacado_tradvec.hpp"
12 
13 #include "Teuchos_Time.hpp"
15 
16 // ADOL-C includes
17 #ifdef HAVE_ADOLC
18 #ifdef PACKAGE
19 #undef PACKAGE
20 #endif
21 #ifdef PACKAGE_NAME
22 #undef PACKAGE_NAME
23 #endif
24 #ifdef PACKAGE_BUGREPORT
25 #undef PACKAGE_BUGREPORT
26 #endif
27 #ifdef PACKAGE_STRING
28 #undef PACKAGE_STRING
29 #endif
30 #ifdef PACKAGE_TARNAME
31 #undef PACKAGE_TARNAME
32 #endif
33 #ifdef PACKAGE_VERSION
34 #undef PACKAGE_VERSION
35 #endif
36 #ifdef VERSION
37 #undef VERSION
38 #endif
39 #include "adolc/adouble.h"
40 #include "adolc/drivers/drivers.h"
41 #include "adolc/interfaces.h"
42 #include "adolc/taping.h"
43 #endif
44 
45 // A performance test that computes a finite-element-like adjoint using
46 // Rad and ADOL-C
47 
50 
51 struct ElemData {
52  static const unsigned int nqp = 2;
53  static const unsigned int nnode = 2;
54  double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
55  unsigned int gid[nnode];
56 
57  ElemData(double mesh_spacing) {
58  // Quadrature points
59  double xi[nqp];
60  xi[0] = -1.0 / std::sqrt(3.0);
61  xi[1] = 1.0 / std::sqrt(3.0);
62 
63  for (unsigned int i=0; i<nqp; i++) {
64  // Weights
65  w[i] = 1.0;
66 
67  // Element transformation Jacobian
68  jac[i] = 0.5*mesh_spacing;
69 
70  // Shape functions & derivatives
71  phi[i][0] = 0.5*(1.0 - xi[i]);
72  phi[i][1] = 0.5*(1.0 + xi[i]);
73  dphi[i][0] = -0.5;
74  dphi[i][1] = 0.5;
75  }
76  }
77 };
78 
79 template <class FadType>
80 void fad_init_fill(const ElemData& e,
81  unsigned int neqn,
82  const std::vector<double>& x,
83  const std::vector< std::vector<double> >& w,
84  std::vector<FadType>& x_fad,
85  std::vector< std::vector<double> >& w_local) {
86  for (unsigned int node=0; node<e.nnode; node++)
87  for (unsigned int eqn=0; eqn<neqn; eqn++)
88  x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn,
89  x[e.gid[node]*neqn+eqn]);
90 
91  for (unsigned int col=0; col<w.size(); col++)
92  for (unsigned int node=0; node<e.nnode; node++)
93  for (unsigned int eqn=0; eqn<neqn; eqn++)
94  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
95 }
96 
97 void rad_init_fill(const ElemData& e,
98  unsigned int neqn,
99  const std::vector<double>& x,
100  const std::vector< std::vector<double> >& w,
101  std::vector<RadType>& x_rad,
102  std::vector< std::vector<double> >& w_local) {
103  for (unsigned int node=0; node<e.nnode; node++)
104  for (unsigned int eqn=0; eqn<neqn; eqn++)
105  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
106 
107  for (unsigned int col=0; col<w.size(); col++)
108  for (unsigned int node=0; node<e.nnode; node++)
109  for (unsigned int eqn=0; eqn<neqn; eqn++)
110  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
111 }
112 
113 void vrad_init_fill(const ElemData& e,
114  unsigned int neqn,
115  const std::vector<double>& x,
116  const std::vector< std::vector<double> >& w,
117  std::vector<VRadType>& x_rad,
118  double** w_local) {
119  for (unsigned int node=0; node<e.nnode; node++)
120  for (unsigned int eqn=0; eqn<neqn; eqn++)
121  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
122 
123  for (unsigned int col=0; col<w.size(); col++)
124  for (unsigned int node=0; node<e.nnode; node++)
125  for (unsigned int eqn=0; eqn<neqn; eqn++)
126  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
127 }
128 
129 #ifdef HAVE_ADOLC
130 void adolc_init_fill(bool retape,
131  int tag,
132  const ElemData& e,
133  unsigned int neqn,
134  const std::vector<double>& x,
135  const std::vector< std::vector<double> >& w,
136  std::vector<double>& x_local,
137  std::vector<adouble>& x_ad,
138  double** w_local) {
139  if (retape)
140  trace_on(tag, 1);
141  for (unsigned int node=0; node<e.nnode; node++)
142  for (unsigned int eqn=0; eqn<neqn; eqn++) {
143  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
144  if (retape)
145  x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
146  }
147 
148  for (unsigned int col=0; col<w.size(); col++)
149  for (unsigned int node=0; node<e.nnode; node++)
150  for (unsigned int eqn=0; eqn<neqn; eqn++)
151  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
152 }
153 #endif
154 
156  unsigned int neqn,
157  const std::vector<double>& x,
158  const std::vector< std::vector<double> >& w,
159  std::vector<double>& x_local,
160  std::vector< std::vector<double> >& w_local) {
161  for (unsigned int node=0; node<e.nnode; node++)
162  for (unsigned int eqn=0; eqn<neqn; eqn++)
163  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
164 
165  for (unsigned int node=0; node<e.nnode; node++)
166  for (unsigned int eqn=0; eqn<neqn; eqn++)
167  for (unsigned int col=0; col<w.size(); col++)
168  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
169 }
170 
171 template <class T>
173  unsigned int neqn,
174  const std::vector<T>& x,
175  std::vector<T>& u,
176  std::vector<T>& du,
177  std::vector<T>& f) {
178  // Construct element solution, derivative
179  for (unsigned int qp=0; qp<e.nqp; qp++) {
180  for (unsigned int eqn=0; eqn<neqn; eqn++) {
181  u[qp*neqn+eqn] = 0.0;
182  du[qp*neqn+eqn] = 0.0;
183  for (unsigned int node=0; node<e.nnode; node++) {
184  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
185  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
186  }
187  }
188  }
189 
190  // Compute sum of equations for coupling
191  std::vector<T> s(e.nqp*neqn);
192  for (unsigned int qp=0; qp<e.nqp; qp++) {
193  for (unsigned int eqn=0; eqn<neqn; eqn++) {
194  s[qp*neqn+eqn] = 0.0;
195  for (unsigned int j=0; j<neqn; j++) {
196  if (j != eqn)
197  s[qp*neqn+eqn] += u[qp*neqn+j];
198  }
199  }
200  }
201 
202  // Evaluate element residual
203  for (unsigned int node=0; node<e.nnode; node++) {
204  for (unsigned int eqn=0; eqn<neqn; eqn++) {
205  unsigned int row = node*neqn+eqn;
206  f[row] = 0.0;
207  for (unsigned int qp=0; qp<e.nqp; qp++) {
208  f[row] +=
209  e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
210  du[qp*neqn+eqn]*e.dphi[qp][node] +
211  e.phi[qp][node]*(exp(u[qp*neqn+eqn]) +
212  u[qp*neqn+eqn]*s[qp*neqn+eqn]));
213  }
214  }
215  }
216 }
217 
219  unsigned int neqn,
220  const std::vector<double>& x,
221  std::vector< std::vector<double> >& w,
222  std::vector<double>& u,
223  std::vector<double>& du,
224  std::vector<double>& f,
225  std::vector< std::vector<double> >& adj) {
226  // Construct element solution, derivative
227  for (unsigned int qp=0; qp<e.nqp; qp++) {
228  for (unsigned int eqn=0; eqn<neqn; eqn++) {
229  u[qp*neqn+eqn] = 0.0;
230  du[qp*neqn+eqn] = 0.0;
231  for (unsigned int node=0; node<e.nnode; node++) {
232  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
233  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
234  }
235  }
236  }
237 
238  // Compute sum of equations for coupling
239  std::vector<double> s(e.nqp*neqn);
240  for (unsigned int qp=0; qp<e.nqp; qp++) {
241  for (unsigned int eqn=0; eqn<neqn; eqn++) {
242  s[qp*neqn+eqn] = 0.0;
243  for (unsigned int j=0; j<neqn; j++) {
244  if (j != eqn)
245  s[qp*neqn+eqn] += u[qp*neqn+j];
246  }
247  }
248  }
249 
250  for (unsigned int col=0; col<w.size(); col++)
251  for (unsigned int node=0; node<e.nnode; node++)
252  for (unsigned int eqn=0; eqn<neqn; eqn++)
253  adj[col][node*neqn+eqn] = 0.0;
254 
255  // Evaluate element residual
256  for (unsigned int node=0; node<e.nnode; node++) {
257  for (unsigned int eqn=0; eqn<neqn; eqn++) {
258  unsigned int row = node*neqn+eqn;
259  f[row] = 0.0;
260  for (unsigned int qp=0; qp<e.nqp; qp++) {
261  double c1 = e.w[qp]*e.jac[qp];
262  double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node];
263  double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn]));
264  double c4 = e.phi[qp][node]*u[qp*neqn+eqn];
265  double c5 = e.phi[qp][node]*s[qp*neqn+eqn];
266  double c35 = c3+c5;
267  double c14 = c1*c4;
268  f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
269  for (unsigned int col=0; col<w.size(); col++) {
270  for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
271  adj[col][node_col*neqn+eqn] +=
272  c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row];
273  for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
274  if (eqn_col != eqn)
275  adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row];
276  }
277  }
278  }
279  }
280  }
281  }
282 }
283 
284 template <class FadType>
286  unsigned int neqn,
287  const std::vector< std::vector<double> >& w_local,
288  const std::vector<FadType>& f_fad,
289  std::vector<double>& f,
290  std::vector< std::vector<double> >& adj) {
291  // Get residual
292  for (unsigned int node=0; node<e.nnode; node++)
293  for (unsigned int eqn=0; eqn<neqn; eqn++)
294  f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
295 
296  // Get adjoint for each adjoint direction
297  for (unsigned int col=0; col<w_local.size(); col++) {
298  FadType z = 0.0;
299  for (unsigned int node=0; node<e.nnode; node++)
300  for (unsigned int eqn=0; eqn<neqn; eqn++)
301  z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
302 
303  for (unsigned int node=0; node<e.nnode; node++)
304  for (unsigned int eqn=0; eqn<neqn; eqn++)
305  adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
306  }
307 }
308 
310  unsigned int neqn,
311  std::vector< std::vector<double> >& w_local,
312  std::vector<RadType>& x_rad,
313  std::vector<RadType>& f_rad,
314  std::vector<RadType*>& ff_rad,
315  std::vector<double>& f,
316  std::vector< std::vector<double> >& adj) {
317  // Get residual
318  for (unsigned int node=0; node<e.nnode; node++)
319  for (unsigned int eqn=0; eqn<neqn; eqn++)
320  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
321 
322  // Get adjoint for each adjoint direction
323  for (unsigned int col=0; col<w_local.size(); col++) {
325  &ff_rad[0],
326  &w_local[col][0]);
327  for (unsigned int node=0; node<e.nnode; node++)
328  for (unsigned int eqn=0; eqn<neqn; eqn++)
329  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
330  }
331 }
332 
334  unsigned int neqn,
335  unsigned int ncol,
336  std::vector<std::size_t>& vn,
337  double** w_local,
338  std::vector<VRadType>& x_rad,
339  std::vector<VRadType>& f_rad,
340  VRadType*** vf_rad,
341  std::vector<double>& f,
342  std::vector< std::vector<double> >& adj) {
343  // Get residual
344  for (unsigned int node=0; node<e.nnode; node++)
345  for (unsigned int eqn=0; eqn<neqn; eqn++)
346  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
347 
348  // Get adjoint for each adjoint direction
350  &vn[0],
351  vf_rad,
352  w_local);
353  for (unsigned int col=0; col<ncol; col++)
354  for (unsigned int node=0; node<e.nnode; node++)
355  for (unsigned int eqn=0; eqn<neqn; eqn++)
356  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
357 }
358 
359 #ifdef HAVE_ADOLC
360 void adolc_process_fill(bool retape,
361  int tag,
362  const ElemData& e,
363  unsigned int neqn,
364  unsigned int ncol,
365  std::vector<double>& x_local,
366  double **w_local,
367  std::vector<adouble>& f_ad,
368  std::vector<double>& f_local,
369  std::vector<double>& f,
370  double **adj_local,
371  std::vector< std::vector<double> >& adj) {
372  if (retape) {
373  for (unsigned int node=0; node<e.nnode; node++)
374  for (unsigned int eqn=0; eqn<neqn; eqn++)
375  f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
376  trace_off();
377  }
378  else
379  zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]);
380 
381  fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local);
382 
383  for (unsigned int node=0; node<e.nnode; node++)
384  for (unsigned int eqn=0; eqn<neqn; eqn++)
385  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
386 
387  for (unsigned int col=0; col<ncol; col++)
388  for (unsigned int node=0; node<e.nnode; node++)
389  for (unsigned int eqn=0; eqn<neqn; eqn++)
390  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
391 }
392 #endif
393 
395  unsigned int neqn,
396  const std::vector<double>& f_local,
397  const std::vector< std::vector<double> >& adj_local,
398  std::vector<double>& f,
399  std::vector< std::vector<double> >& adj) {
400  for (unsigned int node=0; node<e.nnode; node++)
401  for (unsigned int eqn=0; eqn<neqn; eqn++)
402  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
403 
404  for (unsigned int col=0; col<adj.size(); col++)
405  for (unsigned int node=0; node<e.nnode; node++)
406  for (unsigned int eqn=0; eqn<neqn; eqn++)
407  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
408 }
409 
411  unsigned int neqn,
412  const std::vector<double>& f_local,
413  std::vector<double>& f) {
414  for (unsigned int node=0; node<e.nnode; node++)
415  for (unsigned int eqn=0; eqn<neqn; eqn++)
416  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
417 }
418 
419 template <typename FadType>
420 double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
421  unsigned int num_cols, double mesh_spacing) {
422  ElemData e(mesh_spacing);
423 
424  // Solution vector, residual, adjoint
425  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
426  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
427  std::vector< std::vector<double> > adj(num_cols);
428  for (unsigned int node=0; node<num_nodes; node++)
429  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
430  x[node*num_eqns+eqn] =
431  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
432  f[node*num_eqns+eqn] = 0.0;
433  }
434 
435  for (unsigned int col=0; col<num_cols; col++) {
436  w[col] = std::vector<double>(num_nodes*num_eqns);
437  w_local[col] = std::vector<double>(e.nnode*num_eqns);
438  adj[col] = std::vector<double>(num_nodes*num_eqns);
439  for (unsigned int node=0; node<num_nodes; node++)
440  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
441  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
442  adj[col][node*num_eqns+eqn] = 0.0;
443  }
444  }
445 
446  Teuchos::Time timer("FE Fad Adjoint Fill", false);
447  timer.start(true);
448  std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
449  std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
450  for (unsigned int i=0; i<num_nodes-1; i++) {
451  e.gid[0] = i;
452  e.gid[1] = i+1;
453 
454  fad_init_fill(e, num_eqns, x, w, x_fad, w_local);
455  template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
456  fad_process_fill(e, num_eqns, w_local, f_fad, f, adj);
457  }
458  timer.stop();
459 
460  // std::cout << "Fad Residual = " << std::endl;
461  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
462  // std::cout << "\t" << f[i] << std::endl;
463 
464  // std::cout.precision(8);
465  // std::cout << "Fad Adjoint = " << std::endl;
466  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
467  // std::cout << "\t";
468  // for (unsigned int j=0; j<num_cols; j++)
469  // std::cout << adj[j][i] << "\t";
470  // std::cout << std::endl;
471  // }
472 
473  return timer.totalElapsedTime();
474 }
475 
476 double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
477  unsigned int num_cols, double mesh_spacing) {
478  ElemData e(mesh_spacing);
479 
480  // Solution vector, residual, adjoint
481  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
482  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
483  std::vector< std::vector<double> > adj(num_cols);
484  for (unsigned int node=0; node<num_nodes; node++)
485  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
486  x[node*num_eqns+eqn] =
487  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
488  f[node*num_eqns+eqn] = 0.0;
489  }
490 
491  for (unsigned int col=0; col<num_cols; col++) {
492  w[col] = std::vector<double>(num_nodes*num_eqns);
493  w_local[col] = std::vector<double>(e.nnode*num_eqns);
494  adj[col] = std::vector<double>(num_nodes*num_eqns);
495  for (unsigned int node=0; node<num_nodes; node++)
496  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
497  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
498  adj[col][node*num_eqns+eqn] = 0.0;
499  }
500  }
501 
502  Teuchos::Time timer("FE Rad Adjoint Fill", false);
503  timer.start(true);
504  std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
505  std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
506  std::vector<RadType*> ff_rad(f_rad.size());
507  for (unsigned int i=0; i<f_rad.size(); i++)
508  ff_rad[i] = &f_rad[i];
509  for (unsigned int i=0; i<num_nodes-1; i++) {
510  e.gid[0] = i;
511  e.gid[1] = i+1;
512 
513  rad_init_fill(e, num_eqns, x, w, x_rad, w_local);
514  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
515  rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj);
516  }
517  timer.stop();
518 
519  // std::cout << "Rad Residual = " << std::endl;
520  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
521  // std::cout << "\t" << f[i] << std::endl;
522 
523  // std::cout.precision(8);
524  // std::cout << "Rad Adjoint = " << std::endl;
525  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
526  // std::cout << "\t";
527  // for (unsigned int j=0; j<num_cols; j++)
528  // std::cout << adj[j][i] << "\t";
529  // std::cout << std::endl;
530  // }
531 
532  return timer.totalElapsedTime();
533 }
534 
535 double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
536  unsigned int num_cols, double mesh_spacing) {
537  ElemData e(mesh_spacing);
538 
539  // Solution vector, residual, adjoint
540  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
541  std::vector< std::vector<double> > w(num_cols);
542  double **w_local = new double*[num_cols];
543  std::vector< std::vector<double> > adj(num_cols);
544  for (unsigned int node=0; node<num_nodes; node++)
545  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
546  x[node*num_eqns+eqn] =
547  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
548  f[node*num_eqns+eqn] = 0.0;
549  }
550 
551  for (unsigned int col=0; col<num_cols; col++) {
552  w[col] = std::vector<double>(num_nodes*num_eqns);
553  w_local[col] = new double[e.nnode*num_eqns];
554  adj[col] = std::vector<double>(num_nodes*num_eqns);
555  for (unsigned int node=0; node<num_nodes; node++)
556  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
557  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
558  adj[col][node*num_eqns+eqn] = 0.0;
559  }
560  }
561 
562  Teuchos::Time timer("FE Vector Rad Adjoint Fill", false);
563  timer.start(true);
564  std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
565  std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
566  VRadType ***vf_rad = new VRadType**[num_cols];
567  std::vector<std::size_t> vn(num_cols);
568  for (unsigned int i=0; i<num_cols; i++) {
569  vf_rad[i] = new VRadType*[num_eqns*e.nnode];
570  vn[i] = num_eqns*e.nnode;
571  for (unsigned int j=0; j<num_eqns*e.nnode; j++)
572  vf_rad[i][j] = &f_rad[j];
573  }
574  for (unsigned int i=0; i<num_nodes-1; i++) {
575  e.gid[0] = i;
576  e.gid[1] = i+1;
577 
578  vrad_init_fill(e, num_eqns, x, w, x_rad, w_local);
579  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
580  vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad,
581  f, adj);
582  }
583  for (unsigned int col=0; col<num_cols; col++) {
584  delete [] w_local[col];
585  delete [] vf_rad[col];
586  }
587  delete [] w_local;
588  delete [] vf_rad;
589  timer.stop();
590 
591  // std::cout << "Vector Rad Residual = " << std::endl;
592  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
593  // std::cout << "\t" << f[i] << std::endl;
594 
595  // std::cout.precision(8);
596  // std::cout << "Vector Rad Adjoint = " << std::endl;
597  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
598  // std::cout << "\t";
599  // for (unsigned int j=0; j<num_cols; j++)
600  // std::cout << adj[j][i] << "\t";
601  // std::cout << std::endl;
602  // }
603 
604  return timer.totalElapsedTime();
605 }
606 
607 #ifdef HAVE_ADOLC
608 double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
609  unsigned int num_cols, double mesh_spacing) {
610  ElemData e(mesh_spacing);
611 
612  // Solution vector, residual, adjoint
613  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
614  std::vector< std::vector<double> > w(num_cols);
615  std::vector< std::vector<double> > adj(num_cols);
616  for (unsigned int node=0; node<num_nodes; node++)
617  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
618  x[node*num_eqns+eqn] =
619  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
620  f[node*num_eqns+eqn] = 0.0;
621  }
622 
623  for (unsigned int col=0; col<num_cols; col++) {
624  w[col] = std::vector<double>(num_nodes*num_eqns);
625  adj[col] = std::vector<double>(num_nodes*num_eqns);
626  for (unsigned int node=0; node<num_nodes; node++)
627  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
628  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
629  adj[col][node*num_eqns+eqn] = 0.0;
630  }
631  }
632 
633  Teuchos::Time timer("FE ADOL-C Adjoint Fill", false);
634  timer.start(true);
635  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
636  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
637  std::vector<double> x_local(e.nnode*num_eqns);
638  std::vector<double> f_local(e.nnode*num_eqns);
639  double **adj_local = new double*[num_cols];
640  double **w_local = new double*[num_cols];
641  for (unsigned int i=0; i<num_cols; i++) {
642  adj_local[i] = new double[e.nnode*num_eqns];
643  w_local[i] = new double[e.nnode*num_eqns];
644  }
645 
646  // Tape first element
647  e.gid[0] = 0;
648  e.gid[1] = 1;
649  adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
650  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
651  adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
652  f_local, f, adj_local, adj);
653 
654  // Now do remaining fills reusing tape
655  for (unsigned int i=1; i<num_nodes-1; i++) {
656  e.gid[0] = i;
657  e.gid[1] = i+1;
658 
659  adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
660  adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
661  f_local, f, adj_local, adj);
662  }
663  for (unsigned int i=0; i<num_cols; i++) {
664  delete [] adj_local[i];
665  delete [] w_local[i];
666  }
667  delete [] adj_local;
668  delete [] w_local;
669  timer.stop();
670 
671  // std::cout << "ADOL-C Residual = " << std::endl;
672  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
673  // std::cout << "\t" << f[i] << std::endl;
674 
675  // std::cout.precision(8);
676  // std::cout << "ADOL-C Adjoint = " << std::endl;
677  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
678  // std::cout << "\t";
679  // for (unsigned int j=0; j<num_cols; j++)
680  // std::cout << adj[j][i] << "\t";
681  // std::cout << std::endl;
682  // }
683 
684  return timer.totalElapsedTime();
685 }
686 
687 double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
688  unsigned int num_cols, double mesh_spacing) {
689  ElemData e(mesh_spacing);
690 
691  // Solution vector, residual, adjoint
692  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
693  std::vector< std::vector<double> > w(num_cols);
694  std::vector< std::vector<double> > adj(num_cols);
695  for (unsigned int node=0; node<num_nodes; node++)
696  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
697  x[node*num_eqns+eqn] =
698  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
699  f[node*num_eqns+eqn] = 0.0;
700  }
701 
702  for (unsigned int col=0; col<num_cols; col++) {
703  w[col] = std::vector<double>(num_nodes*num_eqns);
704  adj[col] = std::vector<double>(num_nodes*num_eqns);
705  for (unsigned int node=0; node<num_nodes; node++)
706  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
707  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
708  adj[col][node*num_eqns+eqn] = 0.0;
709  }
710  }
711 
712  Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false);
713  timer.start(true);
714  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
715  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
716  std::vector<double> x_local(e.nnode*num_eqns);
717  std::vector<double> f_local(e.nnode*num_eqns);
718  double **adj_local = new double*[num_cols];
719  double **w_local = new double*[num_cols];
720  for (unsigned int i=0; i<num_cols; i++) {
721  adj_local[i] = new double[e.nnode*num_eqns];
722  w_local[i] = new double[e.nnode*num_eqns];
723  }
724 
725  for (unsigned int i=0; i<num_nodes-1; i++) {
726  e.gid[0] = i;
727  e.gid[1] = i+1;
728 
729  adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
730  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
731  adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
732  f_local, f, adj_local, adj);
733  }
734  for (unsigned int i=0; i<num_cols; i++) {
735  delete [] adj_local[i];
736  delete [] w_local[i];
737  }
738  delete [] adj_local;
739  delete [] w_local;
740  timer.stop();
741 
742  // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
743  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
744  // std::cout << "\t" << f[i] << std::endl;
745 
746  // std::cout.precision(8);
747  // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl;
748  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
749  // std::cout << "\t";
750  // for (unsigned int j=0; j<num_cols; j++)
751  // std::cout << adj[j][i] << "\t";
752  // std::cout << std::endl;
753  // }
754 
755  return timer.totalElapsedTime();
756 }
757 #endif
758 
759 double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
760  unsigned int num_cols, double mesh_spacing) {
761  ElemData e(mesh_spacing);
762 
763  // Solution vector, residual, adjoint
764  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
765  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
766  std::vector< std::vector<double> > adj(num_cols);
767  for (unsigned int node=0; node<num_nodes; node++)
768  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
769  x[node*num_eqns+eqn] =
770  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
771  f[node*num_eqns+eqn] = 0.0;
772  }
773 
774  for (unsigned int col=0; col<num_cols; col++) {
775  w[col] = std::vector<double>(num_nodes*num_eqns);
776  w_local[col] = std::vector<double>(e.nnode*num_eqns);
777  adj[col] = std::vector<double>(num_nodes*num_eqns);
778  for (unsigned int node=0; node<num_nodes; node++)
779  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
780  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
781  adj[col][node*num_eqns+eqn] = 0.0;
782  }
783  }
784 
785  Teuchos::Time timer("FE Analytic Adjoint Fill", false);
786  timer.start(true);
787  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
788  std::vector< std::vector<double> > adj_local(num_cols);
789  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
790  for (unsigned int i=0; i<num_cols; i++)
791  adj_local[i] = std::vector<double>(e.nnode*num_eqns);
792  for (unsigned int i=0; i<num_nodes-1; i++) {
793  e.gid[0] = i;
794  e.gid[1] = i+1;
795 
796  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
797  analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local,
798  adj_local);
799  analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj);
800  }
801  timer.stop();
802 
803  // std::cout.precision(8);
804  // std::cout << "Analytic Residual = " << std::endl;
805  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
806  // std::cout << "\t" << f[i] << std::endl;
807 
808  // std::cout.precision(8);
809  // std::cout << "Analytic Adjoint = " << std::endl;
810  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
811  // std::cout << "\t";
812  // for (unsigned int j=0; j<num_cols; j++)
813  // std::cout << adj[j][i] << "\t";
814  // std::cout << std::endl;
815  // }
816 
817  return timer.totalElapsedTime();
818 }
819 
820 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
821  unsigned int num_cols, double mesh_spacing) {
822  ElemData e(mesh_spacing);
823 
824  // Solution vector, residual, jacobian
825  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
826  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
827  for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
828  for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
829  unsigned int row = node_row*num_eqns + eqn_row;
830  x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
831  f[row] = 0.0;
832  }
833  }
834 
835  for (unsigned int col=0; col<num_cols; col++) {
836  w[col] = std::vector<double>(num_nodes*num_eqns);
837  w_local[col] = std::vector<double>(e.nnode*num_eqns);
838  for (unsigned int node=0; node<num_nodes; node++)
839  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
840  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
841  }
842  }
843 
844  Teuchos::Time timer("FE Residual Fill", false);
845  timer.start(true);
846  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
847  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
848  for (unsigned int i=0; i<num_nodes-1; i++) {
849  e.gid[0] = i;
850  e.gid[1] = i+1;
851 
852  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
853  template_element_fill(e, num_eqns, x_local, u, du, f_local);
854  residual_process_fill(e, num_eqns, f_local, f);
855  }
856  timer.stop();
857 
858  // std::cout.precision(8);
859  // std::cout << "Analytic Residual = " << std::endl;
860  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
861  // std::cout << "\t" << f[i] << std::endl;
862 
863  return timer.totalElapsedTime();
864 }
865 
866 int main(int argc, char* argv[]) {
867  int ierr = 0;
868 
869  try {
870  double t, ta, tr;
871  int p = 2;
872  int w = p+7;
873 
874  // Set up command line options
876  clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
877  int num_nodes = 100000;
878  int num_eqns = 2;
879  int num_cols = 4;
880  int rt = 0;
881  clp.setOption("n", &num_nodes, "Number of nodes");
882  clp.setOption("p", &num_eqns, "Number of equations");
883  clp.setOption("q", &num_cols, "Number of adjoint directions");
884  clp.setOption("rt", &rt, "Include ADOL-C retaping test");
885 
886  // Parse options
888  parseReturn= clp.parse(argc, argv);
890  return 1;
891 
892  double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
893 
894  std::cout.setf(std::ios::scientific);
895  std::cout.precision(p);
896  std::cout << "num_nodes = " << num_nodes
897  << ", num_eqns = " << num_eqns
898  << ", num_cols = " << num_cols << ": " << std::endl
899  << " " << " Time" << "\t"<< "Time/Analytic" << "\t"
900  << "Time/Residual" << std::endl;
901 
902  ta = 1.0;
903  tr = 1.0;
904 
905  tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
906 
907  ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
908  std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
909 
910 #ifdef HAVE_ADOLC
911  t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
912  std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
913 
914  if (rt != 0) {
915  t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
916  std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
917  }
918 #endif
919 
920  t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
921  std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
922 
923  t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
924  std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
925 
926  t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
927  std::cout << "Rad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
928 
929  t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
930  std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
931 
932  }
933  catch (std::exception& e) {
934  std::cout << e.what() << std::endl;
935  ierr = 1;
936  }
937  catch (const char *s) {
938  std::cout << s << std::endl;
939  ierr = 1;
940  }
941  catch (...) {
942  std::cout << "Caught unknown exception!" << std::endl;
943  ierr = 1;
944  }
945 
946  return ierr;
947 }
const char * p
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
InactiveDouble * jac
InactiveDouble * w
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
double stop()
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
int main()
Definition: ad_example.cpp:171
sqrt(expr.val())
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
Uncopyable z
InactiveDouble ** phi
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
InactiveDouble ** dphi
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
exp(expr.val())
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)