46 #ifdef PACKAGE_BUGREPORT
47 #undef PACKAGE_BUGREPORT
52 #ifdef PACKAGE_TARNAME
53 #undef PACKAGE_TARNAME
55 #ifdef PACKAGE_VERSION
56 #undef PACKAGE_VERSION
61 #include "adolc/adouble.h"
62 #include "adolc/drivers/drivers.h"
63 #include "adolc/interfaces.h"
64 #include "adolc/taping.h"
74 static const unsigned int nqp = 2;
75 static const unsigned int nnode = 2;
85 for (
unsigned int i=0;
i<
nqp;
i++) {
90 jac[
i] = 0.5*mesh_spacing;
93 phi[
i][0] = 0.5*(1.0 - xi[
i]);
94 phi[
i][1] = 0.5*(1.0 + xi[
i]);
101 template <
class FadType>
104 const std::vector<double>&
x,
105 const std::vector< std::vector<double> >& w,
106 std::vector<FadType>& x_fad,
107 std::vector< std::vector<double> >& w_local) {
108 for (
unsigned int node=0; node<e.
nnode; node++)
109 for (
unsigned int eqn=0; eqn<neqn; eqn++)
110 x_fad[node*neqn+eqn] =
FadType(e.
nnode*neqn, node*neqn+eqn,
111 x[e.
gid[node]*neqn+eqn]);
113 for (
unsigned int col=0; col<w.size(); col++)
114 for (
unsigned int node=0; node<e.
nnode; node++)
115 for (
unsigned int eqn=0; eqn<neqn; eqn++)
116 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
121 const std::vector<double>&
x,
122 const std::vector< std::vector<double> >& w,
123 std::vector<RadType>& x_rad,
124 std::vector< std::vector<double> >& w_local) {
125 for (
unsigned int node=0; node<e.
nnode; node++)
126 for (
unsigned int eqn=0; eqn<neqn; eqn++)
127 x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
129 for (
unsigned int col=0; col<w.size(); col++)
130 for (
unsigned int node=0; node<e.
nnode; node++)
131 for (
unsigned int eqn=0; eqn<neqn; eqn++)
132 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
137 const std::vector<double>&
x,
138 const std::vector< std::vector<double> >& w,
139 std::vector<VRadType>& x_rad,
141 for (
unsigned int node=0; node<e.
nnode; node++)
142 for (
unsigned int eqn=0; eqn<neqn; eqn++)
143 x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
145 for (
unsigned int col=0; col<w.size(); col++)
146 for (
unsigned int node=0; node<e.
nnode; node++)
147 for (
unsigned int eqn=0; eqn<neqn; eqn++)
148 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
152 void adolc_init_fill(
bool retape,
156 const std::vector<double>&
x,
157 const std::vector< std::vector<double> >& w,
158 std::vector<double>& x_local,
159 std::vector<adouble>& x_ad,
163 for (
unsigned int node=0; node<e.
nnode; node++)
164 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
165 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
167 x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
170 for (
unsigned int col=0; col<w.size(); col++)
171 for (
unsigned int node=0; node<e.
nnode; node++)
172 for (
unsigned int eqn=0; eqn<neqn; eqn++)
173 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
179 const std::vector<double>& x,
180 const std::vector< std::vector<double> >& w,
181 std::vector<double>& x_local,
182 std::vector< std::vector<double> >& w_local) {
183 for (
unsigned int node=0; node<e.
nnode; node++)
184 for (
unsigned int eqn=0; eqn<neqn; eqn++)
185 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
187 for (
unsigned int node=0; node<e.
nnode; node++)
188 for (
unsigned int eqn=0; eqn<neqn; eqn++)
189 for (
unsigned int col=0; col<w.size(); col++)
190 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
196 const std::vector<T>& x,
201 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
202 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
203 u[qp*neqn+eqn] = 0.0;
204 du[qp*neqn+eqn] = 0.0;
205 for (
unsigned int node=0; node<e.
nnode; node++) {
206 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
207 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
213 std::vector<T> s(e.
nqp*neqn);
214 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
215 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
216 s[qp*neqn+eqn] = 0.0;
217 for (
unsigned int j=0; j<neqn; j++) {
219 s[qp*neqn+eqn] += u[qp*neqn+j];
225 for (
unsigned int node=0; node<e.
nnode; node++) {
226 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
227 unsigned int row = node*neqn+eqn;
229 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
231 e.
w[qp]*e.
jac[qp]*(-(1.0/(e.
jac[qp]*e.
jac[qp]))*
232 du[qp*neqn+eqn]*e.
dphi[qp][node] +
233 e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]) +
234 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
242 const std::vector<double>& x,
243 std::vector< std::vector<double> >& w,
244 std::vector<double>& u,
245 std::vector<double>& du,
246 std::vector<double>& f,
247 std::vector< std::vector<double> >& adj) {
249 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
250 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
251 u[qp*neqn+eqn] = 0.0;
252 du[qp*neqn+eqn] = 0.0;
253 for (
unsigned int node=0; node<e.
nnode; node++) {
254 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
255 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
261 std::vector<double> s(e.
nqp*neqn);
262 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
263 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
264 s[qp*neqn+eqn] = 0.0;
265 for (
unsigned int j=0; j<neqn; j++) {
267 s[qp*neqn+eqn] += u[qp*neqn+j];
272 for (
unsigned int col=0; col<w.size(); col++)
273 for (
unsigned int node=0; node<e.
nnode; node++)
274 for (
unsigned int eqn=0; eqn<neqn; eqn++)
275 adj[col][node*neqn+eqn] = 0.0;
278 for (
unsigned int node=0; node<e.
nnode; node++) {
279 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
280 unsigned int row = node*neqn+eqn;
282 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
283 double c1 = e.
w[qp]*e.
jac[qp];
284 double c2 = -(1.0/(e.
jac[qp]*e.
jac[qp]))*e.
dphi[qp][node];
285 double c3 = e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]));
286 double c4 = e.
phi[qp][node]*u[qp*neqn+eqn];
287 double c5 = e.
phi[qp][node]*s[qp*neqn+eqn];
290 f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
291 for (
unsigned int col=0; col<w.size(); col++) {
292 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
293 adj[col][node_col*neqn+eqn] +=
294 c1*(c2*e.
dphi[qp][node_col] + c35*e.
phi[qp][node_col])*w[col][row];
295 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
297 adj[col][node_col*neqn+eqn_col] += c14*e.
phi[qp][node_col]*w[col][row];
306 template <
class FadType>
309 const std::vector< std::vector<double> >& w_local,
310 const std::vector<FadType>& f_fad,
311 std::vector<double>& f,
312 std::vector< std::vector<double> >& adj) {
314 for (
unsigned int node=0; node<e.
nnode; node++)
315 for (
unsigned int eqn=0; eqn<neqn; eqn++)
316 f[e.
gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
319 for (
unsigned int col=0; col<w_local.size(); col++) {
321 for (
unsigned int node=0; node<e.
nnode; node++)
322 for (
unsigned int eqn=0; eqn<neqn; eqn++)
323 z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
325 for (
unsigned int node=0; node<e.
nnode; node++)
326 for (
unsigned int eqn=0; eqn<neqn; eqn++)
327 adj[col][e.
gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
333 std::vector< std::vector<double> >& w_local,
334 std::vector<RadType>& x_rad,
335 std::vector<RadType>& f_rad,
336 std::vector<RadType*>& ff_rad,
337 std::vector<double>& f,
338 std::vector< std::vector<double> >& adj) {
340 for (
unsigned int node=0; node<e.
nnode; node++)
341 for (
unsigned int eqn=0; eqn<neqn; eqn++)
342 f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
345 for (
unsigned int col=0; col<w_local.size(); col++) {
349 for (
unsigned int node=0; node<e.
nnode; node++)
350 for (
unsigned int eqn=0; eqn<neqn; eqn++)
351 adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
358 std::vector<std::size_t>& vn,
360 std::vector<VRadType>& x_rad,
361 std::vector<VRadType>& f_rad,
363 std::vector<double>& f,
364 std::vector< std::vector<double> >& adj) {
366 for (
unsigned int node=0; node<e.
nnode; node++)
367 for (
unsigned int eqn=0; eqn<neqn; eqn++)
368 f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
375 for (
unsigned int col=0; col<ncol; col++)
376 for (
unsigned int node=0; node<e.
nnode; node++)
377 for (
unsigned int eqn=0; eqn<neqn; eqn++)
378 adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
382 void adolc_process_fill(
bool retape,
387 std::vector<double>& x_local,
389 std::vector<adouble>& f_ad,
390 std::vector<double>& f_local,
391 std::vector<double>& f,
393 std::vector< std::vector<double> >& adj) {
395 for (
unsigned int node=0; node<e.
nnode; node++)
396 for (
unsigned int eqn=0; eqn<neqn; eqn++)
397 f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
401 zos_forward(tag, neqn*e.
nnode, neqn*e.
nnode, 1, &x_local[0], &f_local[0]);
403 fov_reverse(tag, neqn*e.
nnode, neqn*e.
nnode, ncol, w_local, adj_local);
405 for (
unsigned int node=0; node<e.
nnode; node++)
406 for (
unsigned int eqn=0; eqn<neqn; eqn++)
407 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
409 for (
unsigned int col=0; col<ncol; col++)
410 for (
unsigned int node=0; node<e.
nnode; node++)
411 for (
unsigned int eqn=0; eqn<neqn; eqn++)
412 adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
418 const std::vector<double>& f_local,
419 const std::vector< std::vector<double> >& adj_local,
420 std::vector<double>& f,
421 std::vector< std::vector<double> >& adj) {
422 for (
unsigned int node=0; node<e.
nnode; node++)
423 for (
unsigned int eqn=0; eqn<neqn; eqn++)
424 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
426 for (
unsigned int col=0; col<adj.size(); col++)
427 for (
unsigned int node=0; node<e.
nnode; node++)
428 for (
unsigned int eqn=0; eqn<neqn; eqn++)
429 adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
434 const std::vector<double>& f_local,
435 std::vector<double>& f) {
436 for (
unsigned int node=0; node<e.
nnode; node++)
437 for (
unsigned int eqn=0; eqn<neqn; eqn++)
438 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
441 template <
typename FadType>
443 unsigned int num_cols,
double mesh_spacing) {
447 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
448 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
449 std::vector< std::vector<double> > adj(num_cols);
450 for (
unsigned int node=0; node<num_nodes; node++)
451 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
452 x[node*num_eqns+eqn] =
453 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
454 f[node*num_eqns+eqn] = 0.0;
457 for (
unsigned int col=0; col<num_cols; col++) {
458 w[col] = std::vector<double>(num_nodes*num_eqns);
459 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
460 adj[col] = std::vector<double>(num_nodes*num_eqns);
461 for (
unsigned int node=0; node<num_nodes; node++)
462 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
463 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
464 adj[col][node*num_eqns+eqn] = 0.0;
470 std::vector<FadType> x_fad(e.
nnode*num_eqns), f_fad(e.
nnode*num_eqns);
471 std::vector<FadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
472 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
499 unsigned int num_cols,
double mesh_spacing) {
503 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
504 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
505 std::vector< std::vector<double> > adj(num_cols);
506 for (
unsigned int node=0; node<num_nodes; node++)
507 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
508 x[node*num_eqns+eqn] =
509 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
510 f[node*num_eqns+eqn] = 0.0;
513 for (
unsigned int col=0; col<num_cols; col++) {
514 w[col] = std::vector<double>(num_nodes*num_eqns);
515 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
516 adj[col] = std::vector<double>(num_nodes*num_eqns);
517 for (
unsigned int node=0; node<num_nodes; node++)
518 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
519 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
520 adj[col][node*num_eqns+eqn] = 0.0;
526 std::vector<RadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
527 std::vector<RadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
528 std::vector<RadType*> ff_rad(f_rad.size());
529 for (
unsigned int i=0;
i<f_rad.size();
i++)
530 ff_rad[
i] = &f_rad[
i];
531 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
558 unsigned int num_cols,
double mesh_spacing) {
562 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
563 std::vector< std::vector<double> > w(num_cols);
564 double **w_local =
new double*[num_cols];
565 std::vector< std::vector<double> > adj(num_cols);
566 for (
unsigned int node=0; node<num_nodes; node++)
567 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
568 x[node*num_eqns+eqn] =
569 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
570 f[node*num_eqns+eqn] = 0.0;
573 for (
unsigned int col=0; col<num_cols; col++) {
574 w[col] = std::vector<double>(num_nodes*num_eqns);
575 w_local[col] =
new double[e.
nnode*num_eqns];
576 adj[col] = std::vector<double>(num_nodes*num_eqns);
577 for (
unsigned int node=0; node<num_nodes; node++)
578 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
579 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
580 adj[col][node*num_eqns+eqn] = 0.0;
586 std::vector<VRadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
587 std::vector<VRadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
589 std::vector<std::size_t> vn(num_cols);
590 for (
unsigned int i=0;
i<num_cols;
i++) {
593 for (
unsigned int j=0; j<num_eqns*e.
nnode; j++)
594 vf_rad[
i][j] = &f_rad[j];
596 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
605 for (
unsigned int col=0; col<num_cols; col++) {
606 delete [] w_local[col];
607 delete [] vf_rad[col];
630 double adolc_adj_fill(
unsigned int num_nodes,
unsigned int num_eqns,
631 unsigned int num_cols,
double mesh_spacing) {
635 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
636 std::vector< std::vector<double> > w(num_cols);
637 std::vector< std::vector<double> > adj(num_cols);
638 for (
unsigned int node=0; node<num_nodes; node++)
639 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
640 x[node*num_eqns+eqn] =
641 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
642 f[node*num_eqns+eqn] = 0.0;
645 for (
unsigned int col=0; col<num_cols; col++) {
646 w[col] = std::vector<double>(num_nodes*num_eqns);
647 adj[col] = std::vector<double>(num_nodes*num_eqns);
648 for (
unsigned int node=0; node<num_nodes; node++)
649 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
650 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
651 adj[col][node*num_eqns+eqn] = 0.0;
657 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
658 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
659 std::vector<double> x_local(e.
nnode*num_eqns);
660 std::vector<double> f_local(e.
nnode*num_eqns);
661 double **adj_local =
new double*[num_cols];
662 double **w_local =
new double*[num_cols];
663 for (
unsigned int i=0;
i<num_cols;
i++) {
664 adj_local[
i] =
new double[e.
nnode*num_eqns];
665 w_local[
i] =
new double[e.
nnode*num_eqns];
671 adolc_init_fill(
true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
673 adolc_process_fill(
true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
674 f_local, f, adj_local, adj);
677 for (
unsigned int i=1;
i<num_nodes-1;
i++) {
681 adolc_init_fill(
false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
682 adolc_process_fill(
false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
683 f_local, f, adj_local, adj);
685 for (
unsigned int i=0;
i<num_cols;
i++) {
686 delete [] adj_local[
i];
687 delete [] w_local[
i];
706 return timer.totalElapsedTime();
709 double adolc_retape_adj_fill(
unsigned int num_nodes,
unsigned int num_eqns,
710 unsigned int num_cols,
double mesh_spacing) {
714 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
715 std::vector< std::vector<double> > w(num_cols);
716 std::vector< std::vector<double> > adj(num_cols);
717 for (
unsigned int node=0; node<num_nodes; node++)
718 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
719 x[node*num_eqns+eqn] =
720 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
721 f[node*num_eqns+eqn] = 0.0;
724 for (
unsigned int col=0; col<num_cols; col++) {
725 w[col] = std::vector<double>(num_nodes*num_eqns);
726 adj[col] = std::vector<double>(num_nodes*num_eqns);
727 for (
unsigned int node=0; node<num_nodes; node++)
728 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
729 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
730 adj[col][node*num_eqns+eqn] = 0.0;
736 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
737 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
738 std::vector<double> x_local(e.
nnode*num_eqns);
739 std::vector<double> f_local(e.
nnode*num_eqns);
740 double **adj_local =
new double*[num_cols];
741 double **w_local =
new double*[num_cols];
742 for (
unsigned int i=0;
i<num_cols;
i++) {
743 adj_local[
i] =
new double[e.
nnode*num_eqns];
744 w_local[
i] =
new double[e.
nnode*num_eqns];
747 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
751 adolc_init_fill(
true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
753 adolc_process_fill(
true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
754 f_local, f, adj_local, adj);
756 for (
unsigned int i=0;
i<num_cols;
i++) {
757 delete [] adj_local[
i];
758 delete [] w_local[
i];
777 return timer.totalElapsedTime();
782 unsigned int num_cols,
double mesh_spacing) {
786 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
787 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
788 std::vector< std::vector<double> > adj(num_cols);
789 for (
unsigned int node=0; node<num_nodes; node++)
790 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
791 x[node*num_eqns+eqn] =
792 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
793 f[node*num_eqns+eqn] = 0.0;
796 for (
unsigned int col=0; col<num_cols; col++) {
797 w[col] = std::vector<double>(num_nodes*num_eqns);
798 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
799 adj[col] = std::vector<double>(num_nodes*num_eqns);
800 for (
unsigned int node=0; node<num_nodes; node++)
801 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
802 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
803 adj[col][node*num_eqns+eqn] = 0.0;
809 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
810 std::vector< std::vector<double> > adj_local(num_cols);
811 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
812 for (
unsigned int i=0;
i<num_cols;
i++)
813 adj_local[
i] = std::vector<double>(e.
nnode*num_eqns);
814 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
843 unsigned int num_cols,
double mesh_spacing) {
847 std::vector<double>
x(num_nodes*num_eqns), f(num_nodes*num_eqns);
848 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
849 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
850 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
851 unsigned int row = node_row*num_eqns + eqn_row;
852 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
857 for (
unsigned int col=0; col<num_cols; col++) {
858 w[col] = std::vector<double>(num_nodes*num_eqns);
859 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
860 for (
unsigned int node=0; node<num_nodes; node++)
861 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
862 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
868 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
869 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
870 for (
unsigned int i=0;
i<num_nodes-1;
i++) {
888 int main(
int argc,
char* argv[]) {
898 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
899 int num_nodes = 100000;
903 clp.
setOption(
"n", &num_nodes,
"Number of nodes");
904 clp.
setOption(
"p", &num_eqns,
"Number of equations");
905 clp.
setOption(
"q", &num_cols,
"Number of adjoint directions");
906 clp.
setOption(
"rt", &rt,
"Include ADOL-C retaping test");
910 parseReturn= clp.
parse(argc, argv);
914 double mesh_spacing = 1.0 /
static_cast<double>(num_nodes - 1);
916 std::cout.setf(std::ios::scientific);
917 std::cout.precision(p);
918 std::cout <<
"num_nodes = " << num_nodes
919 <<
", num_eqns = " << num_eqns
920 <<
", num_cols = " << num_cols <<
": " << std::endl
921 <<
" " <<
" Time" <<
"\t"<<
"Time/Analytic" <<
"\t"
922 <<
"Time/Residual" << std::endl;
927 tr =
residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
930 std::cout <<
"Analytic: " << std::setw(w) << ta <<
"\t" << std::setw(w) << ta/ta <<
"\t" << std::setw(w) << ta/tr << std::endl;
933 t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
934 std::cout <<
"ADOL-C: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
937 t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
938 std::cout <<
"ADOL-C(rt):" << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
942 t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
943 std::cout <<
"DFad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
945 t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
946 std::cout <<
"ELRDFad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
948 t =
rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
949 std::cout <<
"Rad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
951 t =
vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
952 std::cout <<
"Vector Rad:" << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
955 catch (std::exception& e) {
956 std::cout << e.what() << std::endl;
959 catch (
const char *s) {
960 std::cout << s << std::endl;
964 std::cout <<
"Caught unknown exception!" << std::endl;
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)