Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rad_fe_adj_fill.cpp
Go to the documentation of this file.
1 // $Id$
2 // $Source$
3 // @HEADER
4 // ***********************************************************************
5 //
6 // Sacado Package
7 // Copyright (2006) Sandia Corporation
8 //
9 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // This library is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as
14 // published by the Free Software Foundation; either version 2.1 of the
15 // License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 // USA
26 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27 // (etphipp@sandia.gov).
28 //
29 // ***********************************************************************
30 // @HEADER
31 
32 #include "Sacado_No_Kokkos.hpp"
33 #include "Sacado_tradvec.hpp"
34 
35 #include "Teuchos_Time.hpp"
37 
38 // ADOL-C includes
39 #ifdef HAVE_ADOLC
40 #ifdef PACKAGE
41 #undef PACKAGE
42 #endif
43 #ifdef PACKAGE_NAME
44 #undef PACKAGE_NAME
45 #endif
46 #ifdef PACKAGE_BUGREPORT
47 #undef PACKAGE_BUGREPORT
48 #endif
49 #ifdef PACKAGE_STRING
50 #undef PACKAGE_STRING
51 #endif
52 #ifdef PACKAGE_TARNAME
53 #undef PACKAGE_TARNAME
54 #endif
55 #ifdef PACKAGE_VERSION
56 #undef PACKAGE_VERSION
57 #endif
58 #ifdef VERSION
59 #undef VERSION
60 #endif
61 #include "adolc/adouble.h"
62 #include "adolc/drivers/drivers.h"
63 #include "adolc/interfaces.h"
64 #include "adolc/taping.h"
65 #endif
66 
67 // A performance test that computes a finite-element-like adjoint using
68 // Rad and ADOL-C
69 
72 
73 struct ElemData {
74  static const unsigned int nqp = 2;
75  static const unsigned int nnode = 2;
76  double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
77  unsigned int gid[nnode];
78 
79  ElemData(double mesh_spacing) {
80  // Quadrature points
81  double xi[nqp];
82  xi[0] = -1.0 / std::sqrt(3.0);
83  xi[1] = 1.0 / std::sqrt(3.0);
84 
85  for (unsigned int i=0; i<nqp; i++) {
86  // Weights
87  w[i] = 1.0;
88 
89  // Element transformation Jacobian
90  jac[i] = 0.5*mesh_spacing;
91 
92  // Shape functions & derivatives
93  phi[i][0] = 0.5*(1.0 - xi[i]);
94  phi[i][1] = 0.5*(1.0 + xi[i]);
95  dphi[i][0] = -0.5;
96  dphi[i][1] = 0.5;
97  }
98  }
99 };
100 
101 template <class FadType>
102 void fad_init_fill(const ElemData& e,
103  unsigned int neqn,
104  const std::vector<double>& x,
105  const std::vector< std::vector<double> >& w,
106  std::vector<FadType>& x_fad,
107  std::vector< std::vector<double> >& w_local) {
108  for (unsigned int node=0; node<e.nnode; node++)
109  for (unsigned int eqn=0; eqn<neqn; eqn++)
110  x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn,
111  x[e.gid[node]*neqn+eqn]);
112 
113  for (unsigned int col=0; col<w.size(); col++)
114  for (unsigned int node=0; node<e.nnode; node++)
115  for (unsigned int eqn=0; eqn<neqn; eqn++)
116  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
117 }
118 
119 void rad_init_fill(const ElemData& e,
120  unsigned int neqn,
121  const std::vector<double>& x,
122  const std::vector< std::vector<double> >& w,
123  std::vector<RadType>& x_rad,
124  std::vector< std::vector<double> >& w_local) {
125  for (unsigned int node=0; node<e.nnode; node++)
126  for (unsigned int eqn=0; eqn<neqn; eqn++)
127  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
128 
129  for (unsigned int col=0; col<w.size(); col++)
130  for (unsigned int node=0; node<e.nnode; node++)
131  for (unsigned int eqn=0; eqn<neqn; eqn++)
132  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
133 }
134 
135 void vrad_init_fill(const ElemData& e,
136  unsigned int neqn,
137  const std::vector<double>& x,
138  const std::vector< std::vector<double> >& w,
139  std::vector<VRadType>& x_rad,
140  double** w_local) {
141  for (unsigned int node=0; node<e.nnode; node++)
142  for (unsigned int eqn=0; eqn<neqn; eqn++)
143  x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
144 
145  for (unsigned int col=0; col<w.size(); col++)
146  for (unsigned int node=0; node<e.nnode; node++)
147  for (unsigned int eqn=0; eqn<neqn; eqn++)
148  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
149 }
150 
151 #ifdef HAVE_ADOLC
152 void adolc_init_fill(bool retape,
153  int tag,
154  const ElemData& e,
155  unsigned int neqn,
156  const std::vector<double>& x,
157  const std::vector< std::vector<double> >& w,
158  std::vector<double>& x_local,
159  std::vector<adouble>& x_ad,
160  double** w_local) {
161  if (retape)
162  trace_on(tag, 1);
163  for (unsigned int node=0; node<e.nnode; node++)
164  for (unsigned int eqn=0; eqn<neqn; eqn++) {
165  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
166  if (retape)
167  x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
168  }
169 
170  for (unsigned int col=0; col<w.size(); col++)
171  for (unsigned int node=0; node<e.nnode; node++)
172  for (unsigned int eqn=0; eqn<neqn; eqn++)
173  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
174 }
175 #endif
176 
178  unsigned int neqn,
179  const std::vector<double>& x,
180  const std::vector< std::vector<double> >& w,
181  std::vector<double>& x_local,
182  std::vector< std::vector<double> >& w_local) {
183  for (unsigned int node=0; node<e.nnode; node++)
184  for (unsigned int eqn=0; eqn<neqn; eqn++)
185  x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
186 
187  for (unsigned int node=0; node<e.nnode; node++)
188  for (unsigned int eqn=0; eqn<neqn; eqn++)
189  for (unsigned int col=0; col<w.size(); col++)
190  w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
191 }
192 
193 template <class T>
195  unsigned int neqn,
196  const std::vector<T>& x,
197  std::vector<T>& u,
198  std::vector<T>& du,
199  std::vector<T>& f) {
200  // Construct element solution, derivative
201  for (unsigned int qp=0; qp<e.nqp; qp++) {
202  for (unsigned int eqn=0; eqn<neqn; eqn++) {
203  u[qp*neqn+eqn] = 0.0;
204  du[qp*neqn+eqn] = 0.0;
205  for (unsigned int node=0; node<e.nnode; node++) {
206  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
207  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
208  }
209  }
210  }
211 
212  // Compute sum of equations for coupling
213  std::vector<T> s(e.nqp*neqn);
214  for (unsigned int qp=0; qp<e.nqp; qp++) {
215  for (unsigned int eqn=0; eqn<neqn; eqn++) {
216  s[qp*neqn+eqn] = 0.0;
217  for (unsigned int j=0; j<neqn; j++) {
218  if (j != eqn)
219  s[qp*neqn+eqn] += u[qp*neqn+j];
220  }
221  }
222  }
223 
224  // Evaluate element residual
225  for (unsigned int node=0; node<e.nnode; node++) {
226  for (unsigned int eqn=0; eqn<neqn; eqn++) {
227  unsigned int row = node*neqn+eqn;
228  f[row] = 0.0;
229  for (unsigned int qp=0; qp<e.nqp; qp++) {
230  f[row] +=
231  e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
232  du[qp*neqn+eqn]*e.dphi[qp][node] +
233  e.phi[qp][node]*(exp(u[qp*neqn+eqn]) +
234  u[qp*neqn+eqn]*s[qp*neqn+eqn]));
235  }
236  }
237  }
238 }
239 
241  unsigned int neqn,
242  const std::vector<double>& x,
243  std::vector< std::vector<double> >& w,
244  std::vector<double>& u,
245  std::vector<double>& du,
246  std::vector<double>& f,
247  std::vector< std::vector<double> >& adj) {
248  // Construct element solution, derivative
249  for (unsigned int qp=0; qp<e.nqp; qp++) {
250  for (unsigned int eqn=0; eqn<neqn; eqn++) {
251  u[qp*neqn+eqn] = 0.0;
252  du[qp*neqn+eqn] = 0.0;
253  for (unsigned int node=0; node<e.nnode; node++) {
254  u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
255  du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
256  }
257  }
258  }
259 
260  // Compute sum of equations for coupling
261  std::vector<double> s(e.nqp*neqn);
262  for (unsigned int qp=0; qp<e.nqp; qp++) {
263  for (unsigned int eqn=0; eqn<neqn; eqn++) {
264  s[qp*neqn+eqn] = 0.0;
265  for (unsigned int j=0; j<neqn; j++) {
266  if (j != eqn)
267  s[qp*neqn+eqn] += u[qp*neqn+j];
268  }
269  }
270  }
271 
272  for (unsigned int col=0; col<w.size(); col++)
273  for (unsigned int node=0; node<e.nnode; node++)
274  for (unsigned int eqn=0; eqn<neqn; eqn++)
275  adj[col][node*neqn+eqn] = 0.0;
276 
277  // Evaluate element residual
278  for (unsigned int node=0; node<e.nnode; node++) {
279  for (unsigned int eqn=0; eqn<neqn; eqn++) {
280  unsigned int row = node*neqn+eqn;
281  f[row] = 0.0;
282  for (unsigned int qp=0; qp<e.nqp; qp++) {
283  double c1 = e.w[qp]*e.jac[qp];
284  double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node];
285  double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn]));
286  double c4 = e.phi[qp][node]*u[qp*neqn+eqn];
287  double c5 = e.phi[qp][node]*s[qp*neqn+eqn];
288  double c35 = c3+c5;
289  double c14 = c1*c4;
290  f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
291  for (unsigned int col=0; col<w.size(); col++) {
292  for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
293  adj[col][node_col*neqn+eqn] +=
294  c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row];
295  for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
296  if (eqn_col != eqn)
297  adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row];
298  }
299  }
300  }
301  }
302  }
303  }
304 }
305 
306 template <class FadType>
308  unsigned int neqn,
309  const std::vector< std::vector<double> >& w_local,
310  const std::vector<FadType>& f_fad,
311  std::vector<double>& f,
312  std::vector< std::vector<double> >& adj) {
313  // Get residual
314  for (unsigned int node=0; node<e.nnode; node++)
315  for (unsigned int eqn=0; eqn<neqn; eqn++)
316  f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
317 
318  // Get adjoint for each adjoint direction
319  for (unsigned int col=0; col<w_local.size(); col++) {
320  FadType z = 0.0;
321  for (unsigned int node=0; node<e.nnode; node++)
322  for (unsigned int eqn=0; eqn<neqn; eqn++)
323  z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
324 
325  for (unsigned int node=0; node<e.nnode; node++)
326  for (unsigned int eqn=0; eqn<neqn; eqn++)
327  adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
328  }
329 }
330 
332  unsigned int neqn,
333  std::vector< std::vector<double> >& w_local,
334  std::vector<RadType>& x_rad,
335  std::vector<RadType>& f_rad,
336  std::vector<RadType*>& ff_rad,
337  std::vector<double>& f,
338  std::vector< std::vector<double> >& adj) {
339  // Get residual
340  for (unsigned int node=0; node<e.nnode; node++)
341  for (unsigned int eqn=0; eqn<neqn; eqn++)
342  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
343 
344  // Get adjoint for each adjoint direction
345  for (unsigned int col=0; col<w_local.size(); col++) {
347  &ff_rad[0],
348  &w_local[col][0]);
349  for (unsigned int node=0; node<e.nnode; node++)
350  for (unsigned int eqn=0; eqn<neqn; eqn++)
351  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
352  }
353 }
354 
356  unsigned int neqn,
357  unsigned int ncol,
358  std::vector<std::size_t>& vn,
359  double** w_local,
360  std::vector<VRadType>& x_rad,
361  std::vector<VRadType>& f_rad,
362  VRadType*** vf_rad,
363  std::vector<double>& f,
364  std::vector< std::vector<double> >& adj) {
365  // Get residual
366  for (unsigned int node=0; node<e.nnode; node++)
367  for (unsigned int eqn=0; eqn<neqn; eqn++)
368  f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
369 
370  // Get adjoint for each adjoint direction
372  &vn[0],
373  vf_rad,
374  w_local);
375  for (unsigned int col=0; col<ncol; col++)
376  for (unsigned int node=0; node<e.nnode; node++)
377  for (unsigned int eqn=0; eqn<neqn; eqn++)
378  adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
379 }
380 
381 #ifdef HAVE_ADOLC
382 void adolc_process_fill(bool retape,
383  int tag,
384  const ElemData& e,
385  unsigned int neqn,
386  unsigned int ncol,
387  std::vector<double>& x_local,
388  double **w_local,
389  std::vector<adouble>& f_ad,
390  std::vector<double>& f_local,
391  std::vector<double>& f,
392  double **adj_local,
393  std::vector< std::vector<double> >& adj) {
394  if (retape) {
395  for (unsigned int node=0; node<e.nnode; node++)
396  for (unsigned int eqn=0; eqn<neqn; eqn++)
397  f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
398  trace_off();
399  }
400  else
401  zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]);
402 
403  fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local);
404 
405  for (unsigned int node=0; node<e.nnode; node++)
406  for (unsigned int eqn=0; eqn<neqn; eqn++)
407  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
408 
409  for (unsigned int col=0; col<ncol; col++)
410  for (unsigned int node=0; node<e.nnode; node++)
411  for (unsigned int eqn=0; eqn<neqn; eqn++)
412  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
413 }
414 #endif
415 
417  unsigned int neqn,
418  const std::vector<double>& f_local,
419  const std::vector< std::vector<double> >& adj_local,
420  std::vector<double>& f,
421  std::vector< std::vector<double> >& adj) {
422  for (unsigned int node=0; node<e.nnode; node++)
423  for (unsigned int eqn=0; eqn<neqn; eqn++)
424  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
425 
426  for (unsigned int col=0; col<adj.size(); col++)
427  for (unsigned int node=0; node<e.nnode; node++)
428  for (unsigned int eqn=0; eqn<neqn; eqn++)
429  adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
430 }
431 
433  unsigned int neqn,
434  const std::vector<double>& f_local,
435  std::vector<double>& f) {
436  for (unsigned int node=0; node<e.nnode; node++)
437  for (unsigned int eqn=0; eqn<neqn; eqn++)
438  f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
439 }
440 
441 template <typename FadType>
442 double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
443  unsigned int num_cols, double mesh_spacing) {
444  ElemData e(mesh_spacing);
445 
446  // Solution vector, residual, adjoint
447  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
448  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
449  std::vector< std::vector<double> > adj(num_cols);
450  for (unsigned int node=0; node<num_nodes; node++)
451  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
452  x[node*num_eqns+eqn] =
453  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
454  f[node*num_eqns+eqn] = 0.0;
455  }
456 
457  for (unsigned int col=0; col<num_cols; col++) {
458  w[col] = std::vector<double>(num_nodes*num_eqns);
459  w_local[col] = std::vector<double>(e.nnode*num_eqns);
460  adj[col] = std::vector<double>(num_nodes*num_eqns);
461  for (unsigned int node=0; node<num_nodes; node++)
462  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
463  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
464  adj[col][node*num_eqns+eqn] = 0.0;
465  }
466  }
467 
468  Teuchos::Time timer("FE Fad Adjoint Fill", false);
469  timer.start(true);
470  std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
471  std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
472  for (unsigned int i=0; i<num_nodes-1; i++) {
473  e.gid[0] = i;
474  e.gid[1] = i+1;
475 
476  fad_init_fill(e, num_eqns, x, w, x_fad, w_local);
477  template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
478  fad_process_fill(e, num_eqns, w_local, f_fad, f, adj);
479  }
480  timer.stop();
481 
482  // std::cout << "Fad Residual = " << std::endl;
483  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
484  // std::cout << "\t" << f[i] << std::endl;
485 
486  // std::cout.precision(8);
487  // std::cout << "Fad Adjoint = " << std::endl;
488  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
489  // std::cout << "\t";
490  // for (unsigned int j=0; j<num_cols; j++)
491  // std::cout << adj[j][i] << "\t";
492  // std::cout << std::endl;
493  // }
494 
495  return timer.totalElapsedTime();
496 }
497 
498 double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
499  unsigned int num_cols, double mesh_spacing) {
500  ElemData e(mesh_spacing);
501 
502  // Solution vector, residual, adjoint
503  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
504  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
505  std::vector< std::vector<double> > adj(num_cols);
506  for (unsigned int node=0; node<num_nodes; node++)
507  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
508  x[node*num_eqns+eqn] =
509  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
510  f[node*num_eqns+eqn] = 0.0;
511  }
512 
513  for (unsigned int col=0; col<num_cols; col++) {
514  w[col] = std::vector<double>(num_nodes*num_eqns);
515  w_local[col] = std::vector<double>(e.nnode*num_eqns);
516  adj[col] = std::vector<double>(num_nodes*num_eqns);
517  for (unsigned int node=0; node<num_nodes; node++)
518  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
519  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
520  adj[col][node*num_eqns+eqn] = 0.0;
521  }
522  }
523 
524  Teuchos::Time timer("FE Rad Adjoint Fill", false);
525  timer.start(true);
526  std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
527  std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
528  std::vector<RadType*> ff_rad(f_rad.size());
529  for (unsigned int i=0; i<f_rad.size(); i++)
530  ff_rad[i] = &f_rad[i];
531  for (unsigned int i=0; i<num_nodes-1; i++) {
532  e.gid[0] = i;
533  e.gid[1] = i+1;
534 
535  rad_init_fill(e, num_eqns, x, w, x_rad, w_local);
536  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
537  rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj);
538  }
539  timer.stop();
540 
541  // std::cout << "Rad Residual = " << std::endl;
542  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
543  // std::cout << "\t" << f[i] << std::endl;
544 
545  // std::cout.precision(8);
546  // std::cout << "Rad Adjoint = " << std::endl;
547  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
548  // std::cout << "\t";
549  // for (unsigned int j=0; j<num_cols; j++)
550  // std::cout << adj[j][i] << "\t";
551  // std::cout << std::endl;
552  // }
553 
554  return timer.totalElapsedTime();
555 }
556 
557 double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
558  unsigned int num_cols, double mesh_spacing) {
559  ElemData e(mesh_spacing);
560 
561  // Solution vector, residual, adjoint
562  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
563  std::vector< std::vector<double> > w(num_cols);
564  double **w_local = new double*[num_cols];
565  std::vector< std::vector<double> > adj(num_cols);
566  for (unsigned int node=0; node<num_nodes; node++)
567  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
568  x[node*num_eqns+eqn] =
569  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
570  f[node*num_eqns+eqn] = 0.0;
571  }
572 
573  for (unsigned int col=0; col<num_cols; col++) {
574  w[col] = std::vector<double>(num_nodes*num_eqns);
575  w_local[col] = new double[e.nnode*num_eqns];
576  adj[col] = std::vector<double>(num_nodes*num_eqns);
577  for (unsigned int node=0; node<num_nodes; node++)
578  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
579  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
580  adj[col][node*num_eqns+eqn] = 0.0;
581  }
582  }
583 
584  Teuchos::Time timer("FE Vector Rad Adjoint Fill", false);
585  timer.start(true);
586  std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
587  std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
588  VRadType ***vf_rad = new VRadType**[num_cols];
589  std::vector<std::size_t> vn(num_cols);
590  for (unsigned int i=0; i<num_cols; i++) {
591  vf_rad[i] = new VRadType*[num_eqns*e.nnode];
592  vn[i] = num_eqns*e.nnode;
593  for (unsigned int j=0; j<num_eqns*e.nnode; j++)
594  vf_rad[i][j] = &f_rad[j];
595  }
596  for (unsigned int i=0; i<num_nodes-1; i++) {
597  e.gid[0] = i;
598  e.gid[1] = i+1;
599 
600  vrad_init_fill(e, num_eqns, x, w, x_rad, w_local);
601  template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
602  vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad,
603  f, adj);
604  }
605  for (unsigned int col=0; col<num_cols; col++) {
606  delete [] w_local[col];
607  delete [] vf_rad[col];
608  }
609  delete [] w_local;
610  delete [] vf_rad;
611  timer.stop();
612 
613  // std::cout << "Vector Rad Residual = " << std::endl;
614  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
615  // std::cout << "\t" << f[i] << std::endl;
616 
617  // std::cout.precision(8);
618  // std::cout << "Vector Rad Adjoint = " << std::endl;
619  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
620  // std::cout << "\t";
621  // for (unsigned int j=0; j<num_cols; j++)
622  // std::cout << adj[j][i] << "\t";
623  // std::cout << std::endl;
624  // }
625 
626  return timer.totalElapsedTime();
627 }
628 
629 #ifdef HAVE_ADOLC
630 double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
631  unsigned int num_cols, double mesh_spacing) {
632  ElemData e(mesh_spacing);
633 
634  // Solution vector, residual, adjoint
635  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
636  std::vector< std::vector<double> > w(num_cols);
637  std::vector< std::vector<double> > adj(num_cols);
638  for (unsigned int node=0; node<num_nodes; node++)
639  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
640  x[node*num_eqns+eqn] =
641  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
642  f[node*num_eqns+eqn] = 0.0;
643  }
644 
645  for (unsigned int col=0; col<num_cols; col++) {
646  w[col] = std::vector<double>(num_nodes*num_eqns);
647  adj[col] = std::vector<double>(num_nodes*num_eqns);
648  for (unsigned int node=0; node<num_nodes; node++)
649  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
650  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
651  adj[col][node*num_eqns+eqn] = 0.0;
652  }
653  }
654 
655  Teuchos::Time timer("FE ADOL-C Adjoint Fill", false);
656  timer.start(true);
657  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
658  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
659  std::vector<double> x_local(e.nnode*num_eqns);
660  std::vector<double> f_local(e.nnode*num_eqns);
661  double **adj_local = new double*[num_cols];
662  double **w_local = new double*[num_cols];
663  for (unsigned int i=0; i<num_cols; i++) {
664  adj_local[i] = new double[e.nnode*num_eqns];
665  w_local[i] = new double[e.nnode*num_eqns];
666  }
667 
668  // Tape first element
669  e.gid[0] = 0;
670  e.gid[1] = 1;
671  adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
672  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
673  adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
674  f_local, f, adj_local, adj);
675 
676  // Now do remaining fills reusing tape
677  for (unsigned int i=1; i<num_nodes-1; i++) {
678  e.gid[0] = i;
679  e.gid[1] = i+1;
680 
681  adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
682  adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
683  f_local, f, adj_local, adj);
684  }
685  for (unsigned int i=0; i<num_cols; i++) {
686  delete [] adj_local[i];
687  delete [] w_local[i];
688  }
689  delete [] adj_local;
690  delete [] w_local;
691  timer.stop();
692 
693  // std::cout << "ADOL-C Residual = " << std::endl;
694  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
695  // std::cout << "\t" << f[i] << std::endl;
696 
697  // std::cout.precision(8);
698  // std::cout << "ADOL-C Adjoint = " << std::endl;
699  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
700  // std::cout << "\t";
701  // for (unsigned int j=0; j<num_cols; j++)
702  // std::cout << adj[j][i] << "\t";
703  // std::cout << std::endl;
704  // }
705 
706  return timer.totalElapsedTime();
707 }
708 
709 double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
710  unsigned int num_cols, double mesh_spacing) {
711  ElemData e(mesh_spacing);
712 
713  // Solution vector, residual, adjoint
714  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
715  std::vector< std::vector<double> > w(num_cols);
716  std::vector< std::vector<double> > adj(num_cols);
717  for (unsigned int node=0; node<num_nodes; node++)
718  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
719  x[node*num_eqns+eqn] =
720  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
721  f[node*num_eqns+eqn] = 0.0;
722  }
723 
724  for (unsigned int col=0; col<num_cols; col++) {
725  w[col] = std::vector<double>(num_nodes*num_eqns);
726  adj[col] = std::vector<double>(num_nodes*num_eqns);
727  for (unsigned int node=0; node<num_nodes; node++)
728  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
729  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
730  adj[col][node*num_eqns+eqn] = 0.0;
731  }
732  }
733 
734  Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false);
735  timer.start(true);
736  std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
737  std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
738  std::vector<double> x_local(e.nnode*num_eqns);
739  std::vector<double> f_local(e.nnode*num_eqns);
740  double **adj_local = new double*[num_cols];
741  double **w_local = new double*[num_cols];
742  for (unsigned int i=0; i<num_cols; i++) {
743  adj_local[i] = new double[e.nnode*num_eqns];
744  w_local[i] = new double[e.nnode*num_eqns];
745  }
746 
747  for (unsigned int i=0; i<num_nodes-1; i++) {
748  e.gid[0] = i;
749  e.gid[1] = i+1;
750 
751  adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
752  template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
753  adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
754  f_local, f, adj_local, adj);
755  }
756  for (unsigned int i=0; i<num_cols; i++) {
757  delete [] adj_local[i];
758  delete [] w_local[i];
759  }
760  delete [] adj_local;
761  delete [] w_local;
762  timer.stop();
763 
764  // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
765  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
766  // std::cout << "\t" << f[i] << std::endl;
767 
768  // std::cout.precision(8);
769  // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl;
770  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
771  // std::cout << "\t";
772  // for (unsigned int j=0; j<num_cols; j++)
773  // std::cout << adj[j][i] << "\t";
774  // std::cout << std::endl;
775  // }
776 
777  return timer.totalElapsedTime();
778 }
779 #endif
780 
781 double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
782  unsigned int num_cols, double mesh_spacing) {
783  ElemData e(mesh_spacing);
784 
785  // Solution vector, residual, adjoint
786  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
787  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
788  std::vector< std::vector<double> > adj(num_cols);
789  for (unsigned int node=0; node<num_nodes; node++)
790  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
791  x[node*num_eqns+eqn] =
792  (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
793  f[node*num_eqns+eqn] = 0.0;
794  }
795 
796  for (unsigned int col=0; col<num_cols; col++) {
797  w[col] = std::vector<double>(num_nodes*num_eqns);
798  w_local[col] = std::vector<double>(e.nnode*num_eqns);
799  adj[col] = std::vector<double>(num_nodes*num_eqns);
800  for (unsigned int node=0; node<num_nodes; node++)
801  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
802  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
803  adj[col][node*num_eqns+eqn] = 0.0;
804  }
805  }
806 
807  Teuchos::Time timer("FE Analytic Adjoint Fill", false);
808  timer.start(true);
809  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
810  std::vector< std::vector<double> > adj_local(num_cols);
811  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
812  for (unsigned int i=0; i<num_cols; i++)
813  adj_local[i] = std::vector<double>(e.nnode*num_eqns);
814  for (unsigned int i=0; i<num_nodes-1; i++) {
815  e.gid[0] = i;
816  e.gid[1] = i+1;
817 
818  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
819  analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local,
820  adj_local);
821  analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj);
822  }
823  timer.stop();
824 
825  // std::cout.precision(8);
826  // std::cout << "Analytic Residual = " << std::endl;
827  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
828  // std::cout << "\t" << f[i] << std::endl;
829 
830  // std::cout.precision(8);
831  // std::cout << "Analytic Adjoint = " << std::endl;
832  // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
833  // std::cout << "\t";
834  // for (unsigned int j=0; j<num_cols; j++)
835  // std::cout << adj[j][i] << "\t";
836  // std::cout << std::endl;
837  // }
838 
839  return timer.totalElapsedTime();
840 }
841 
842 double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
843  unsigned int num_cols, double mesh_spacing) {
844  ElemData e(mesh_spacing);
845 
846  // Solution vector, residual, jacobian
847  std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
848  std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
849  for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
850  for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
851  unsigned int row = node_row*num_eqns + eqn_row;
852  x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
853  f[row] = 0.0;
854  }
855  }
856 
857  for (unsigned int col=0; col<num_cols; col++) {
858  w[col] = std::vector<double>(num_nodes*num_eqns);
859  w_local[col] = std::vector<double>(e.nnode*num_eqns);
860  for (unsigned int node=0; node<num_nodes; node++)
861  for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
862  w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
863  }
864  }
865 
866  Teuchos::Time timer("FE Residual Fill", false);
867  timer.start(true);
868  std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
869  std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
870  for (unsigned int i=0; i<num_nodes-1; i++) {
871  e.gid[0] = i;
872  e.gid[1] = i+1;
873 
874  analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
875  template_element_fill(e, num_eqns, x_local, u, du, f_local);
876  residual_process_fill(e, num_eqns, f_local, f);
877  }
878  timer.stop();
879 
880  // std::cout.precision(8);
881  // std::cout << "Analytic Residual = " << std::endl;
882  // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
883  // std::cout << "\t" << f[i] << std::endl;
884 
885  return timer.totalElapsedTime();
886 }
887 
888 int main(int argc, char* argv[]) {
889  int ierr = 0;
890 
891  try {
892  double t, ta, tr;
893  int p = 2;
894  int w = p+7;
895 
896  // Set up command line options
898  clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
899  int num_nodes = 100000;
900  int num_eqns = 2;
901  int num_cols = 4;
902  int rt = 0;
903  clp.setOption("n", &num_nodes, "Number of nodes");
904  clp.setOption("p", &num_eqns, "Number of equations");
905  clp.setOption("q", &num_cols, "Number of adjoint directions");
906  clp.setOption("rt", &rt, "Include ADOL-C retaping test");
907 
908  // Parse options
910  parseReturn= clp.parse(argc, argv);
912  return 1;
913 
914  double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
915 
916  std::cout.setf(std::ios::scientific);
917  std::cout.precision(p);
918  std::cout << "num_nodes = " << num_nodes
919  << ", num_eqns = " << num_eqns
920  << ", num_cols = " << num_cols << ": " << std::endl
921  << " " << " Time" << "\t"<< "Time/Analytic" << "\t"
922  << "Time/Residual" << std::endl;
923 
924  ta = 1.0;
925  tr = 1.0;
926 
927  tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
928 
929  ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
930  std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
931 
932 #ifdef HAVE_ADOLC
933  t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
934  std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
935 
936  if (rt != 0) {
937  t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
938  std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
939  }
940 #endif
941 
942  t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
943  std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
944 
945  t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
946  std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
947 
948  t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
949  std::cout << "Rad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
950 
951  t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
952  std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
953 
954  }
955  catch (std::exception& e) {
956  std::cout << e.what() << std::endl;
957  ierr = 1;
958  }
959  catch (const char *s) {
960  std::cout << s << std::endl;
961  ierr = 1;
962  }
963  catch (...) {
964  std::cout << "Caught unknown exception!" << std::endl;
965  ierr = 1;
966  }
967 
968  return ierr;
969 }
const char * p
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
InactiveDouble * jac
InactiveDouble * w
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
double stop()
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
int main()
Definition: ad_example.cpp:191
sqrt(expr.val())
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
Uncopyable z
InactiveDouble ** phi
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
InactiveDouble ** dphi
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
exp(expr.val())
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)