Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rad_fe_adj_fill.cpp
Go to the documentation of this file.
1 // $Id$
2 // $Source$
3 // @HEADER
4 // ***********************************************************************
5 //
6 // Sacado Package
7 // Copyright (2006) Sandia Corporation
8 //
9 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // This library is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as
14 // published by the Free Software Foundation; either version 2.1 of the
15 // License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 // USA
26 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27 // (etphipp@sandia.gov).
28 //
29 // ***********************************************************************
30 // @HEADER
31 
32 #include "Sacado_No_Kokkos.hpp"
33 #include "Sacado_tradvec.hpp"
34 
35 #include "Teuchos_Time.hpp"
37 
38 // ADOL-C includes
39 #ifdef HAVE_ADOLC
40 #ifdef PACKAGE
41 #undef PACKAGE
42 #endif
43 #ifdef PACKAGE_NAME
44 #undef PACKAGE_NAME
45 #endif
46 #ifdef PACKAGE_BUGREPORT
47 #undef PACKAGE_BUGREPORT
48 #endif
49 #ifdef PACKAGE_STRING
50 #undef PACKAGE_STRING
51 #endif
52 #ifdef PACKAGE_TARNAME
53 #undef PACKAGE_TARNAME
54 #endif
55 #ifdef PACKAGE_VERSION
56 #undef PACKAGE_VERSION
57 #endif
58 #ifdef VERSION
59 #undef VERSION
60 #endif
61 #include "adolc/adouble.h"
62 #include "adolc/drivers/drivers.h"
63 #include "adolc/interfaces.h"
64 #endif
65 
66 // A performance test that computes a finite-element-like adjoint using
67 // Rad and ADOL-C
68 
71 
72 struct ElemData {
73  static const unsigned int nqp = 2;
74  static const unsigned int nnode = 2;
75  double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
76  unsigned int gid[nnode];
77 
78  ElemData(double mesh_spacing) {
79  // Quadrature points
80  double xi[nqp];
81  xi[0] = -1.0 / std::sqrt(3.0);
82  xi[1] = 1.0 / std::sqrt(3.0);
83 
84  for (unsigned int i=0; i<nqp; i++) {
85  // Weights
86  w[i] = 1.0;
87 
88  // Element transformation Jacobian
89  jac[i] = 0.5*mesh_spacing;
90 
91  // Shape functions & derivatives
92  phi[i][0] = 0.5*(1.0 - xi[i]);
93  phi[i][1] = 0.5*(1.0 + xi[i]);
94  dphi[i][0] = -0.5;
95  dphi[i][1] = 0.5;
96  }
97  }
98 };
99 
100 template <class FadType>
101 void fad_init_fill(const ElemData& e,
102  unsigned int neqn,
103  const std::vector<double>& x,
104  const std::vector< std::vector<double> >& w,
105  std::vector<FadType>& x_fad,
106  std::vector< std::vector<double> >& w_local) {
107  for (unsigned int node=0; node<e.nnode; node++)
108  for (unsigned int eqn=0; eqn<neqn; eqn++)
109  x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn,
110  x[e.gid[node]*neqn+eqn]);
111 
112  for (unsigned int col=0; col<w.size(); col++)
113  for (unsigned int node=0; node<e.nnode; node++)
114  for (unsigned int eqn=0; eqn<neqn; eqn++)
115  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
116 }
117 
118 void rad_init_fill(const ElemData& e,
119  unsigned int neqn,
120  const std::vector<double>& x,
121  const std::vector< std::vector<double> >& w,
122  std::vector<RadType>& x_rad,
123  std::vector< std::vector<double> >& w_local) {
124  for (unsigned int node=0; node<e.nnode; node++)
125  for (unsigned int eqn=0; eqn<neqn; eqn++)
126  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
127 
128  for (unsigned int col=0; col<w.size(); col++)
129  for (unsigned int node=0; node<e.nnode; node++)
130  for (unsigned int eqn=0; eqn<neqn; eqn++)
131  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
132 }
133 
134 void vrad_init_fill(const ElemData& e,
135  unsigned int neqn,
136  const std::vector<double>& x,
137  const std::vector< std::vector<double> >& w,
138  std::vector<VRadType>& x_rad,
139  double** w_local) {
140  for (unsigned int node=0; node<e.nnode; node++)
141  for (unsigned int eqn=0; eqn<neqn; eqn++)
142  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
143 
144  for (unsigned int col=0; col<w.size(); col++)
145  for (unsigned int node=0; node<e.nnode; node++)
146  for (unsigned int eqn=0; eqn<neqn; eqn++)
147  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
148 }
149 
150 #ifdef HAVE_ADOLC
151 void adolc_init_fill(bool retape,
152  int tag,
153  const ElemData& e,
154  unsigned int neqn,
155  const std::vector<double>& x,
156  const std::vector< std::vector<double> >& w,
157  std::vector<double>& x_local,
158  std::vector<adouble>& x_ad,
159  double** w_local) {
160  if (retape)
161  trace_on(tag, 1);
162  for (unsigned int node=0; node<e.nnode; node++)
163  for (unsigned int eqn=0; eqn<neqn; eqn++) {
164  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
165  if (retape)
166  x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
167  }
168 
169  for (unsigned int col=0; col<w.size(); col++)
170  for (unsigned int node=0; node<e.nnode; node++)
171  for (unsigned int eqn=0; eqn<neqn; eqn++)
172  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
173 }
174 #endif
175 
177  unsigned int neqn,
178  const std::vector<double>& x,
179  const std::vector< std::vector<double> >& w,
180  std::vector<double>& x_local,
181  std::vector< std::vector<double> >& w_local) {
182  for (unsigned int node=0; node<e.nnode; node++)
183  for (unsigned int eqn=0; eqn<neqn; eqn++)
184  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
185 
186  for (unsigned int node=0; node<e.nnode; node++)
187  for (unsigned int eqn=0; eqn<neqn; eqn++)
188  for (unsigned int col=0; col<w.size(); col++)
189  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
190 }
191 
192 template <class T>
194  unsigned int neqn,
195  const std::vector<T>& x,
196  std::vector<T>& u,
197  std::vector<T>& du,
198  std::vector<T>& f) {
199  // Construct element solution, derivative
200  for (unsigned int qp=0; qp<e.nqp; qp++) {
201  for (unsigned int eqn=0; eqn<neqn; eqn++) {
202  u[qp*neqn+eqn] = 0.0;
203  du[qp*neqn+eqn] = 0.0;
204  for (unsigned int node=0; node<e.nnode; node++) {
205  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
206  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
207  }
208  }
209  }
210 
211  // Compute sum of equations for coupling
212  std::vector<T> s(e.nqp*neqn);
213  for (unsigned int qp=0; qp<e.nqp; qp++) {
214  for (unsigned int eqn=0; eqn<neqn; eqn++) {
215  s[qp*neqn+eqn] = 0.0;
216  for (unsigned int j=0; j<neqn; j++) {
217  if (j != eqn)
218  s[qp*neqn+eqn] += u[qp*neqn+j];
219  }
220  }
221  }
222 
223  // Evaluate element residual
224  for (unsigned int node=0; node<e.nnode; node++) {
225  for (unsigned int eqn=0; eqn<neqn; eqn++) {
226  unsigned int row = node*neqn+eqn;
227  f[row] = 0.0;
228  for (unsigned int qp=0; qp<e.nqp; qp++) {
229  f[row] +=
230  e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
231  du[qp*neqn+eqn]*e.dphi[qp][node] +
232  e.phi[qp][node]*(exp(u[qp*neqn+eqn]) +
233  u[qp*neqn+eqn]*s[qp*neqn+eqn]));
234  }
235  }
236  }
237 }
238 
240  unsigned int neqn,
241  const std::vector<double>& x,
242  std::vector< std::vector<double> >& w,
243  std::vector<double>& u,
244  std::vector<double>& du,
245  std::vector<double>& f,
246  std::vector< std::vector<double> >& adj) {
247  // Construct element solution, derivative
248  for (unsigned int qp=0; qp<e.nqp; qp++) {
249  for (unsigned int eqn=0; eqn<neqn; eqn++) {
250  u[qp*neqn+eqn] = 0.0;
251  du[qp*neqn+eqn] = 0.0;
252  for (unsigned int node=0; node<e.nnode; node++) {
253  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
254  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
255  }
256  }
257  }
258 
259  // Compute sum of equations for coupling
260  std::vector<double> s(e.nqp*neqn);
261  for (unsigned int qp=0; qp<e.nqp; qp++) {
262  for (unsigned int eqn=0; eqn<neqn; eqn++) {
263  s[qp*neqn+eqn] = 0.0;
264  for (unsigned int j=0; j<neqn; j++) {
265  if (j != eqn)
266  s[qp*neqn+eqn] += u[qp*neqn+j];
267  }
268  }
269  }
270 
271  for (unsigned int col=0; col<w.size(); col++)
272  for (unsigned int node=0; node<e.nnode; node++)
273  for (unsigned int eqn=0; eqn<neqn; eqn++)
274  adj[col][node*neqn+eqn] = 0.0;
275 
276  // Evaluate element residual
277  for (unsigned int node=0; node<e.nnode; node++) {
278  for (unsigned int eqn=0; eqn<neqn; eqn++) {
279  unsigned int row = node*neqn+eqn;
280  f[row] = 0.0;
281  for (unsigned int qp=0; qp<e.nqp; qp++) {
282  double c1 = e.w[qp]*e.jac[qp];
283  double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node];
284  double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn]));
285  double c4 = e.phi[qp][node]*u[qp*neqn+eqn];
286  double c5 = e.phi[qp][node]*s[qp*neqn+eqn];
287  double c35 = c3+c5;
288  double c14 = c1*c4;
289  f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
290  for (unsigned int col=0; col<w.size(); col++) {
291  for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
292  adj[col][node_col*neqn+eqn] +=
293  c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row];
294  for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
295  if (eqn_col != eqn)
296  adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row];
297  }
298  }
299  }
300  }
301  }
302  }
303 }
304 
305 template <class FadType>
307  unsigned int neqn,
308  const std::vector< std::vector<double> >& w_local,
309  const std::vector<FadType>& f_fad,
310  std::vector<double>& f,
311  std::vector< std::vector<double> >& adj) {
312  // Get residual
313  for (unsigned int node=0; node<e.nnode; node++)
314  for (unsigned int eqn=0; eqn<neqn; eqn++)
315  f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
316 
317  // Get adjoint for each adjoint direction
318  for (unsigned int col=0; col<w_local.size(); col++) {
319  FadType z = 0.0;
320  for (unsigned int node=0; node<e.nnode; node++)
321  for (unsigned int eqn=0; eqn<neqn; eqn++)
322  z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
323 
324  for (unsigned int node=0; node<e.nnode; node++)
325  for (unsigned int eqn=0; eqn<neqn; eqn++)
326  adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
327  }
328 }
329 
331  unsigned int neqn,
332  std::vector< std::vector<double> >& w_local,
333  std::vector<RadType>& x_rad,
334  std::vector<RadType>& f_rad,
335  std::vector<RadType*>& ff_rad,
336  std::vector<double>& f,
337  std::vector< std::vector<double> >& adj) {
338  // Get residual
339  for (unsigned int node=0; node<e.nnode; node++)
340  for (unsigned int eqn=0; eqn<neqn; eqn++)
341  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
342 
343  // Get adjoint for each adjoint direction
344  for (unsigned int col=0; col<w_local.size(); col++) {
346  &ff_rad[0],
347  &w_local[col][0]);
348  for (unsigned int node=0; node<e.nnode; node++)
349  for (unsigned int eqn=0; eqn<neqn; eqn++)
350  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
351  }
352 }
353 
355  unsigned int neqn,
356  unsigned int ncol,
357  std::vector<std::size_t>& vn,
358  double** w_local,
359  std::vector<VRadType>& x_rad,
360  std::vector<VRadType>& f_rad,
361  VRadType*** vf_rad,
362  std::vector<double>& f,
363  std::vector< std::vector<double> >& adj) {
364  // Get residual
365  for (unsigned int node=0; node<e.nnode; node++)
366  for (unsigned int eqn=0; eqn<neqn; eqn++)
367  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
368 
369  // Get adjoint for each adjoint direction
371  &vn[0],
372  vf_rad,
373  w_local);
374  for (unsigned int col=0; col<ncol; col++)
375  for (unsigned int node=0; node<e.nnode; node++)
376  for (unsigned int eqn=0; eqn<neqn; eqn++)
377  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
378 }
379 
380 #ifdef HAVE_ADOLC
381 void adolc_process_fill(bool retape,
382  int tag,
383  const ElemData& e,
384  unsigned int neqn,
385  unsigned int ncol,
386  std::vector<double>& x_local,
387  double **w_local,
388  std::vector<adouble>& f_ad,
389  std::vector<double>& f_local,
390  std::vector<double>& f,
391  double **adj_local,
392  std::vector< std::vector<double> >& adj) {
393  if (retape) {
394  for (unsigned int node=0; node<e.nnode; node++)
395  for (unsigned int eqn=0; eqn<neqn; eqn++)
396  f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
397  trace_off();
398  }
399  else
400  zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]);
401 
402  fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local);
403 
404  for (unsigned int node=0; node<e.nnode; node++)
405  for (unsigned int eqn=0; eqn<neqn; eqn++)
406  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
407 
408  for (unsigned int col=0; col<ncol; col++)
409  for (unsigned int node=0; node<e.nnode; node++)
410  for (unsigned int eqn=0; eqn<neqn; eqn++)
411  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
412 }
413 #endif
414 
416  unsigned int neqn,
417  const std::vector<double>& f_local,
418  const std::vector< std::vector<double> >& adj_local,
419  std::vector<double>& f,
420  std::vector< std::vector<double> >& adj) {
421  for (unsigned int node=0; node<e.nnode; node++)
422  for (unsigned int eqn=0; eqn<neqn; eqn++)
423  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
424 
425  for (unsigned int col=0; col<adj.size(); col++)
426  for (unsigned int node=0; node<e.nnode; node++)
427  for (unsigned int eqn=0; eqn<neqn; eqn++)
428  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
429 }
430 
432  unsigned int neqn,
433  const std::vector<double>& f_local,
434  std::vector<double>& f) {
435  for (unsigned int node=0; node<e.nnode; node++)
436  for (unsigned int eqn=0; eqn<neqn; eqn++)
437  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
438 }
439 
440 template <typename FadType>
441 double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
442  unsigned int num_cols, double mesh_spacing) {
443  ElemData e(mesh_spacing);
444 
445  // Solution vector, residual, adjoint
446  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
447  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
448  std::vector< std::vector<double> > adj(num_cols);
449  for (unsigned int node=0; node<num_nodes; node++)
450  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
451  x[node*num_eqns+eqn] =
452  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
453  f[node*num_eqns+eqn] = 0.0;
454  }
455 
456  for (unsigned int col=0; col<num_cols; col++) {
457  w[col] = std::vector<double>(num_nodes*num_eqns);
458  w_local[col] = std::vector<double>(e.nnode*num_eqns);
459  adj[col] = std::vector<double>(num_nodes*num_eqns);
460  for (unsigned int node=0; node<num_nodes; node++)
461  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
462  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
463  adj[col][node*num_eqns+eqn] = 0.0;
464  }
465  }
466 
467  Teuchos::Time timer("FE Fad Adjoint Fill", false);
468  timer.start(true);
469  std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
470  std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
471  for (unsigned int i=0; i<num_nodes-1; i++) {
472  e.gid[0] = i;
473  e.gid[1] = i+1;
474 
475  fad_init_fill(e, num_eqns, x, w, x_fad, w_local);
476  template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
477  fad_process_fill(e, num_eqns, w_local, f_fad, f, adj);
478  }
479  timer.stop();
480 
481  // std::cout << "Fad Residual = " << std::endl;
482  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
483  // std::cout << "\t" << f[i] << std::endl;
484 
485  // std::cout.precision(8);
486  // std::cout << "Fad Adjoint = " << std::endl;
487  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
488  // std::cout << "\t";
489  // for (unsigned int j=0; j<num_cols; j++)
490  // std::cout << adj[j][i] << "\t";
491  // std::cout << std::endl;
492  // }
493 
494  return timer.totalElapsedTime();
495 }
496 
497 double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
498  unsigned int num_cols, double mesh_spacing) {
499  ElemData e(mesh_spacing);
500 
501  // Solution vector, residual, adjoint
502  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
503  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
504  std::vector< std::vector<double> > adj(num_cols);
505  for (unsigned int node=0; node<num_nodes; node++)
506  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
507  x[node*num_eqns+eqn] =
508  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
509  f[node*num_eqns+eqn] = 0.0;
510  }
511 
512  for (unsigned int col=0; col<num_cols; col++) {
513  w[col] = std::vector<double>(num_nodes*num_eqns);
514  w_local[col] = std::vector<double>(e.nnode*num_eqns);
515  adj[col] = std::vector<double>(num_nodes*num_eqns);
516  for (unsigned int node=0; node<num_nodes; node++)
517  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
518  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
519  adj[col][node*num_eqns+eqn] = 0.0;
520  }
521  }
522 
523  Teuchos::Time timer("FE Rad Adjoint Fill", false);
524  timer.start(true);
525  std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
526  std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
527  std::vector<RadType*> ff_rad(f_rad.size());
528  for (unsigned int i=0; i<f_rad.size(); i++)
529  ff_rad[i] = &f_rad[i];
530  for (unsigned int i=0; i<num_nodes-1; i++) {
531  e.gid[0] = i;
532  e.gid[1] = i+1;
533 
534  rad_init_fill(e, num_eqns, x, w, x_rad, w_local);
535  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
536  rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj);
537  }
538  timer.stop();
539 
540  // std::cout << "Rad Residual = " << std::endl;
541  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
542  // std::cout << "\t" << f[i] << std::endl;
543 
544  // std::cout.precision(8);
545  // std::cout << "Rad Adjoint = " << std::endl;
546  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
547  // std::cout << "\t";
548  // for (unsigned int j=0; j<num_cols; j++)
549  // std::cout << adj[j][i] << "\t";
550  // std::cout << std::endl;
551  // }
552 
553  return timer.totalElapsedTime();
554 }
555 
556 double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
557  unsigned int num_cols, double mesh_spacing) {
558  ElemData e(mesh_spacing);
559 
560  // Solution vector, residual, adjoint
561  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
562  std::vector< std::vector<double> > w(num_cols);
563  double **w_local = new double*[num_cols];
564  std::vector< std::vector<double> > adj(num_cols);
565  for (unsigned int node=0; node<num_nodes; node++)
566  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
567  x[node*num_eqns+eqn] =
568  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
569  f[node*num_eqns+eqn] = 0.0;
570  }
571 
572  for (unsigned int col=0; col<num_cols; col++) {
573  w[col] = std::vector<double>(num_nodes*num_eqns);
574  w_local[col] = new double[e.nnode*num_eqns];
575  adj[col] = std::vector<double>(num_nodes*num_eqns);
576  for (unsigned int node=0; node<num_nodes; node++)
577  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
578  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
579  adj[col][node*num_eqns+eqn] = 0.0;
580  }
581  }
582 
583  Teuchos::Time timer("FE Vector Rad Adjoint Fill", false);
584  timer.start(true);
585  std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
586  std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
587  VRadType ***vf_rad = new VRadType**[num_cols];
588  std::vector<std::size_t> vn(num_cols);
589  for (unsigned int i=0; i<num_cols; i++) {
590  vf_rad[i] = new VRadType*[num_eqns*e.nnode];
591  vn[i] = num_eqns*e.nnode;
592  for (unsigned int j=0; j<num_eqns*e.nnode; j++)
593  vf_rad[i][j] = &f_rad[j];
594  }
595  for (unsigned int i=0; i<num_nodes-1; i++) {
596  e.gid[0] = i;
597  e.gid[1] = i+1;
598 
599  vrad_init_fill(e, num_eqns, x, w, x_rad, w_local);
600  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
601  vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad,
602  f, adj);
603  }
604  for (unsigned int col=0; col<num_cols; col++) {
605  delete [] w_local[col];
606  delete [] vf_rad[col];
607  }
608  delete [] w_local;
609  delete [] vf_rad;
610  timer.stop();
611 
612  // std::cout << "Vector Rad Residual = " << std::endl;
613  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
614  // std::cout << "\t" << f[i] << std::endl;
615 
616  // std::cout.precision(8);
617  // std::cout << "Vector Rad Adjoint = " << std::endl;
618  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
619  // std::cout << "\t";
620  // for (unsigned int j=0; j<num_cols; j++)
621  // std::cout << adj[j][i] << "\t";
622  // std::cout << std::endl;
623  // }
624 
625  return timer.totalElapsedTime();
626 }
627 
628 #ifdef HAVE_ADOLC
629 double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
630  unsigned int num_cols, double mesh_spacing) {
631  ElemData e(mesh_spacing);
632 
633  // Solution vector, residual, adjoint
634  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
635  std::vector< std::vector<double> > w(num_cols);
636  std::vector< std::vector<double> > adj(num_cols);
637  for (unsigned int node=0; node<num_nodes; node++)
638  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
639  x[node*num_eqns+eqn] =
640  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
641  f[node*num_eqns+eqn] = 0.0;
642  }
643 
644  for (unsigned int col=0; col<num_cols; col++) {
645  w[col] = std::vector<double>(num_nodes*num_eqns);
646  adj[col] = std::vector<double>(num_nodes*num_eqns);
647  for (unsigned int node=0; node<num_nodes; node++)
648  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
649  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
650  adj[col][node*num_eqns+eqn] = 0.0;
651  }
652  }
653 
654  Teuchos::Time timer("FE ADOL-C Adjoint Fill", false);
655  timer.start(true);
656  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
657  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
658  std::vector<double> x_local(e.nnode*num_eqns);
659  std::vector<double> f_local(e.nnode*num_eqns);
660  double **adj_local = new double*[num_cols];
661  double **w_local = new double*[num_cols];
662  for (unsigned int i=0; i<num_cols; i++) {
663  adj_local[i] = new double[e.nnode*num_eqns];
664  w_local[i] = new double[e.nnode*num_eqns];
665  }
666 
667  // Tape first element
668  e.gid[0] = 0;
669  e.gid[1] = 1;
670  adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
671  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
672  adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
673  f_local, f, adj_local, adj);
674 
675  // Now do remaining fills reusing tape
676  for (unsigned int i=1; i<num_nodes-1; i++) {
677  e.gid[0] = i;
678  e.gid[1] = i+1;
679 
680  adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
681  adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
682  f_local, f, adj_local, adj);
683  }
684  for (unsigned int i=0; i<num_cols; i++) {
685  delete [] adj_local[i];
686  delete [] w_local[i];
687  }
688  delete [] adj_local;
689  delete [] w_local;
690  timer.stop();
691 
692  // std::cout << "ADOL-C Residual = " << std::endl;
693  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
694  // std::cout << "\t" << f[i] << std::endl;
695 
696  // std::cout.precision(8);
697  // std::cout << "ADOL-C Adjoint = " << std::endl;
698  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
699  // std::cout << "\t";
700  // for (unsigned int j=0; j<num_cols; j++)
701  // std::cout << adj[j][i] << "\t";
702  // std::cout << std::endl;
703  // }
704 
705  return timer.totalElapsedTime();
706 }
707 
708 double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
709  unsigned int num_cols, double mesh_spacing) {
710  ElemData e(mesh_spacing);
711 
712  // Solution vector, residual, adjoint
713  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
714  std::vector< std::vector<double> > w(num_cols);
715  std::vector< std::vector<double> > adj(num_cols);
716  for (unsigned int node=0; node<num_nodes; node++)
717  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
718  x[node*num_eqns+eqn] =
719  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
720  f[node*num_eqns+eqn] = 0.0;
721  }
722 
723  for (unsigned int col=0; col<num_cols; col++) {
724  w[col] = std::vector<double>(num_nodes*num_eqns);
725  adj[col] = std::vector<double>(num_nodes*num_eqns);
726  for (unsigned int node=0; node<num_nodes; node++)
727  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
728  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
729  adj[col][node*num_eqns+eqn] = 0.0;
730  }
731  }
732 
733  Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false);
734  timer.start(true);
735  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
736  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
737  std::vector<double> x_local(e.nnode*num_eqns);
738  std::vector<double> f_local(e.nnode*num_eqns);
739  double **adj_local = new double*[num_cols];
740  double **w_local = new double*[num_cols];
741  for (unsigned int i=0; i<num_cols; i++) {
742  adj_local[i] = new double[e.nnode*num_eqns];
743  w_local[i] = new double[e.nnode*num_eqns];
744  }
745 
746  for (unsigned int i=0; i<num_nodes-1; i++) {
747  e.gid[0] = i;
748  e.gid[1] = i+1;
749 
750  adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
751  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
752  adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
753  f_local, f, adj_local, adj);
754  }
755  for (unsigned int i=0; i<num_cols; i++) {
756  delete [] adj_local[i];
757  delete [] w_local[i];
758  }
759  delete [] adj_local;
760  delete [] w_local;
761  timer.stop();
762 
763  // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
764  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
765  // std::cout << "\t" << f[i] << std::endl;
766 
767  // std::cout.precision(8);
768  // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl;
769  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
770  // std::cout << "\t";
771  // for (unsigned int j=0; j<num_cols; j++)
772  // std::cout << adj[j][i] << "\t";
773  // std::cout << std::endl;
774  // }
775 
776  return timer.totalElapsedTime();
777 }
778 #endif
779 
780 double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
781  unsigned int num_cols, double mesh_spacing) {
782  ElemData e(mesh_spacing);
783 
784  // Solution vector, residual, adjoint
785  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
786  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
787  std::vector< std::vector<double> > adj(num_cols);
788  for (unsigned int node=0; node<num_nodes; node++)
789  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
790  x[node*num_eqns+eqn] =
791  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
792  f[node*num_eqns+eqn] = 0.0;
793  }
794 
795  for (unsigned int col=0; col<num_cols; col++) {
796  w[col] = std::vector<double>(num_nodes*num_eqns);
797  w_local[col] = std::vector<double>(e.nnode*num_eqns);
798  adj[col] = std::vector<double>(num_nodes*num_eqns);
799  for (unsigned int node=0; node<num_nodes; node++)
800  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
801  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
802  adj[col][node*num_eqns+eqn] = 0.0;
803  }
804  }
805 
806  Teuchos::Time timer("FE Analytic Adjoint Fill", false);
807  timer.start(true);
808  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
809  std::vector< std::vector<double> > adj_local(num_cols);
810  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
811  for (unsigned int i=0; i<num_cols; i++)
812  adj_local[i] = std::vector<double>(e.nnode*num_eqns);
813  for (unsigned int i=0; i<num_nodes-1; i++) {
814  e.gid[0] = i;
815  e.gid[1] = i+1;
816 
817  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
818  analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local,
819  adj_local);
820  analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj);
821  }
822  timer.stop();
823 
824  // std::cout.precision(8);
825  // std::cout << "Analytic Residual = " << std::endl;
826  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
827  // std::cout << "\t" << f[i] << std::endl;
828 
829  // std::cout.precision(8);
830  // std::cout << "Analytic Adjoint = " << std::endl;
831  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
832  // std::cout << "\t";
833  // for (unsigned int j=0; j<num_cols; j++)
834  // std::cout << adj[j][i] << "\t";
835  // std::cout << std::endl;
836  // }
837 
838  return timer.totalElapsedTime();
839 }
840 
841 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
842  unsigned int num_cols, double mesh_spacing) {
843  ElemData e(mesh_spacing);
844 
845  // Solution vector, residual, jacobian
846  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
847  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
848  for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
849  for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
850  unsigned int row = node_row*num_eqns + eqn_row;
851  x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
852  f[row] = 0.0;
853  }
854  }
855 
856  for (unsigned int col=0; col<num_cols; col++) {
857  w[col] = std::vector<double>(num_nodes*num_eqns);
858  w_local[col] = std::vector<double>(e.nnode*num_eqns);
859  for (unsigned int node=0; node<num_nodes; node++)
860  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
861  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
862  }
863  }
864 
865  Teuchos::Time timer("FE Residual Fill", false);
866  timer.start(true);
867  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
868  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
869  for (unsigned int i=0; i<num_nodes-1; i++) {
870  e.gid[0] = i;
871  e.gid[1] = i+1;
872 
873  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
874  template_element_fill(e, num_eqns, x_local, u, du, f_local);
875  residual_process_fill(e, num_eqns, f_local, f);
876  }
877  timer.stop();
878 
879  // std::cout.precision(8);
880  // std::cout << "Analytic Residual = " << std::endl;
881  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
882  // std::cout << "\t" << f[i] << std::endl;
883 
884  return timer.totalElapsedTime();
885 }
886 
887 int main(int argc, char* argv[]) {
888  int ierr = 0;
889 
890  try {
891  double t, ta, tr;
892  int p = 2;
893  int w = p+7;
894 
895  // Set up command line options
897  clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
898  int num_nodes = 100000;
899  int num_eqns = 2;
900  int num_cols = 4;
901  int rt = 0;
902  clp.setOption("n", &num_nodes, "Number of nodes");
903  clp.setOption("p", &num_eqns, "Number of equations");
904  clp.setOption("q", &num_cols, "Number of adjoint directions");
905  clp.setOption("rt", &rt, "Include ADOL-C retaping test");
906 
907  // Parse options
909  parseReturn= clp.parse(argc, argv);
911  return 1;
912 
913  double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
914 
915  std::cout.setf(std::ios::scientific);
916  std::cout.precision(p);
917  std::cout << "num_nodes = " << num_nodes
918  << ", num_eqns = " << num_eqns
919  << ", num_cols = " << num_cols << ": " << std::endl
920  << " " << " Time" << "\t"<< "Time/Analytic" << "\t"
921  << "Time/Residual" << std::endl;
922 
923  ta = 1.0;
924  tr = 1.0;
925 
926  tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
927 
928  ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
929  std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
930 
931 #ifdef HAVE_ADOLC
932  t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
933  std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
934 
935  if (rt != 0) {
936  t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
937  std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
938  }
939 #endif
940 
941  t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
942  std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
943 
944  t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
945  std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
946 
947  t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
948  std::cout << "Rad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
949 
950  t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
951  std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
952 
953  }
954  catch (std::exception& e) {
955  std::cout << e.what() << std::endl;
956  ierr = 1;
957  }
958  catch (const char *s) {
959  std::cout << s << std::endl;
960  ierr = 1;
961  }
962  catch (...) {
963  std::cout << "Caught unknown exception!" << std::endl;
964  ierr = 1;
965  }
966 
967  return ierr;
968 }
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
InactiveDouble * jac
InactiveDouble * w
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
double stop()
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
int main()
Definition: ad_example.cpp:191
sqrt(expr.val())
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
InactiveDouble ** phi
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
InactiveDouble ** dphi
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
exp(expr.val())
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)