46 #ifdef PACKAGE_BUGREPORT
47 #undef PACKAGE_BUGREPORT
52 #ifdef PACKAGE_TARNAME
53 #undef PACKAGE_TARNAME
55 #ifdef PACKAGE_VERSION
56 #undef PACKAGE_VERSION
61 #include "adolc/adouble.h"
62 #include "adolc/drivers/drivers.h"
63 #include "adolc/interfaces.h"
73 static const unsigned int nqp = 2;
74 static const unsigned int nnode = 2;
84 for (
unsigned int i=0; i<
nqp; i++) {
89 jac[i] = 0.5*mesh_spacing;
92 phi[i][0] = 0.5*(1.0 - xi[i]);
93 phi[i][1] = 0.5*(1.0 + xi[i]);
100 template <
class FadType>
103 const std::vector<double>& x,
104 const std::vector< std::vector<double> >& w,
105 std::vector<FadType>& x_fad,
106 std::vector< std::vector<double> >& w_local) {
107 for (
unsigned int node=0; node<e.
nnode; node++)
108 for (
unsigned int eqn=0; eqn<neqn; eqn++)
109 x_fad[node*neqn+eqn] =
FadType(e.
nnode*neqn, node*neqn+eqn,
110 x[e.
gid[node]*neqn+eqn]);
112 for (
unsigned int col=0; col<w.size(); col++)
113 for (
unsigned int node=0; node<e.
nnode; node++)
114 for (
unsigned int eqn=0; eqn<neqn; eqn++)
115 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
120 const std::vector<double>& x,
121 const std::vector< std::vector<double> >& w,
122 std::vector<RadType>& x_rad,
123 std::vector< std::vector<double> >& w_local) {
124 for (
unsigned int node=0; node<e.
nnode; node++)
125 for (
unsigned int eqn=0; eqn<neqn; eqn++)
126 x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
128 for (
unsigned int col=0; col<w.size(); col++)
129 for (
unsigned int node=0; node<e.
nnode; node++)
130 for (
unsigned int eqn=0; eqn<neqn; eqn++)
131 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
136 const std::vector<double>& x,
137 const std::vector< std::vector<double> >& w,
138 std::vector<VRadType>& x_rad,
140 for (
unsigned int node=0; node<e.
nnode; node++)
141 for (
unsigned int eqn=0; eqn<neqn; eqn++)
142 x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
144 for (
unsigned int col=0; col<w.size(); col++)
145 for (
unsigned int node=0; node<e.
nnode; node++)
146 for (
unsigned int eqn=0; eqn<neqn; eqn++)
147 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
151 void adolc_init_fill(
bool retape,
155 const std::vector<double>& x,
156 const std::vector< std::vector<double> >& w,
157 std::vector<double>& x_local,
158 std::vector<adouble>& x_ad,
162 for (
unsigned int node=0; node<e.
nnode; node++)
163 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
164 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
166 x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
169 for (
unsigned int col=0; col<w.size(); col++)
170 for (
unsigned int node=0; node<e.
nnode; node++)
171 for (
unsigned int eqn=0; eqn<neqn; eqn++)
172 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
178 const std::vector<double>& x,
179 const std::vector< std::vector<double> >& w,
180 std::vector<double>& x_local,
181 std::vector< std::vector<double> >& w_local) {
182 for (
unsigned int node=0; node<e.
nnode; node++)
183 for (
unsigned int eqn=0; eqn<neqn; eqn++)
184 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
186 for (
unsigned int node=0; node<e.
nnode; node++)
187 for (
unsigned int eqn=0; eqn<neqn; eqn++)
188 for (
unsigned int col=0; col<w.size(); col++)
189 w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
195 const std::vector<T>& x,
200 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
201 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
202 u[qp*neqn+eqn] = 0.0;
203 du[qp*neqn+eqn] = 0.0;
204 for (
unsigned int node=0; node<e.
nnode; node++) {
205 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
206 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
212 std::vector<T> s(e.
nqp*neqn);
213 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
214 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
215 s[qp*neqn+eqn] = 0.0;
216 for (
unsigned int j=0; j<neqn; j++) {
218 s[qp*neqn+eqn] += u[qp*neqn+j];
224 for (
unsigned int node=0; node<e.
nnode; node++) {
225 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
226 unsigned int row = node*neqn+eqn;
228 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
230 e.
w[qp]*e.
jac[qp]*(-(1.0/(e.
jac[qp]*e.
jac[qp]))*
231 du[qp*neqn+eqn]*e.
dphi[qp][node] +
232 e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]) +
233 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
241 const std::vector<double>& x,
242 std::vector< std::vector<double> >& w,
243 std::vector<double>& u,
244 std::vector<double>& du,
245 std::vector<double>& f,
246 std::vector< std::vector<double> >& adj) {
248 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
249 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
250 u[qp*neqn+eqn] = 0.0;
251 du[qp*neqn+eqn] = 0.0;
252 for (
unsigned int node=0; node<e.
nnode; node++) {
253 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
254 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
260 std::vector<double> s(e.
nqp*neqn);
261 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
262 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
263 s[qp*neqn+eqn] = 0.0;
264 for (
unsigned int j=0; j<neqn; j++) {
266 s[qp*neqn+eqn] += u[qp*neqn+j];
271 for (
unsigned int col=0; col<w.size(); col++)
272 for (
unsigned int node=0; node<e.
nnode; node++)
273 for (
unsigned int eqn=0; eqn<neqn; eqn++)
274 adj[col][node*neqn+eqn] = 0.0;
277 for (
unsigned int node=0; node<e.
nnode; node++) {
278 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
279 unsigned int row = node*neqn+eqn;
281 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
282 double c1 = e.
w[qp]*e.
jac[qp];
283 double c2 = -(1.0/(e.
jac[qp]*e.
jac[qp]))*e.
dphi[qp][node];
284 double c3 = e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]));
285 double c4 = e.
phi[qp][node]*u[qp*neqn+eqn];
286 double c5 = e.
phi[qp][node]*s[qp*neqn+eqn];
289 f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
290 for (
unsigned int col=0; col<w.size(); col++) {
291 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
292 adj[col][node_col*neqn+eqn] +=
293 c1*(c2*e.
dphi[qp][node_col] + c35*e.
phi[qp][node_col])*w[col][row];
294 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
296 adj[col][node_col*neqn+eqn_col] += c14*e.
phi[qp][node_col]*w[col][row];
305 template <
class FadType>
308 const std::vector< std::vector<double> >& w_local,
309 const std::vector<FadType>& f_fad,
310 std::vector<double>& f,
311 std::vector< std::vector<double> >& adj) {
313 for (
unsigned int node=0; node<e.
nnode; node++)
314 for (
unsigned int eqn=0; eqn<neqn; eqn++)
315 f[e.
gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
318 for (
unsigned int col=0; col<w_local.size(); col++) {
320 for (
unsigned int node=0; node<e.
nnode; node++)
321 for (
unsigned int eqn=0; eqn<neqn; eqn++)
322 z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
324 for (
unsigned int node=0; node<e.
nnode; node++)
325 for (
unsigned int eqn=0; eqn<neqn; eqn++)
326 adj[col][e.
gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
332 std::vector< std::vector<double> >& w_local,
333 std::vector<RadType>& x_rad,
334 std::vector<RadType>& f_rad,
335 std::vector<RadType*>& ff_rad,
336 std::vector<double>& f,
337 std::vector< std::vector<double> >& adj) {
339 for (
unsigned int node=0; node<e.
nnode; node++)
340 for (
unsigned int eqn=0; eqn<neqn; eqn++)
341 f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
344 for (
unsigned int col=0; col<w_local.size(); col++) {
348 for (
unsigned int node=0; node<e.
nnode; node++)
349 for (
unsigned int eqn=0; eqn<neqn; eqn++)
350 adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
357 std::vector<std::size_t>& vn,
359 std::vector<VRadType>& x_rad,
360 std::vector<VRadType>& f_rad,
362 std::vector<double>& f,
363 std::vector< std::vector<double> >& adj) {
365 for (
unsigned int node=0; node<e.
nnode; node++)
366 for (
unsigned int eqn=0; eqn<neqn; eqn++)
367 f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
374 for (
unsigned int col=0; col<ncol; col++)
375 for (
unsigned int node=0; node<e.
nnode; node++)
376 for (
unsigned int eqn=0; eqn<neqn; eqn++)
377 adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
381 void adolc_process_fill(
bool retape,
386 std::vector<double>& x_local,
388 std::vector<adouble>& f_ad,
389 std::vector<double>& f_local,
390 std::vector<double>& f,
392 std::vector< std::vector<double> >& adj) {
394 for (
unsigned int node=0; node<e.
nnode; node++)
395 for (
unsigned int eqn=0; eqn<neqn; eqn++)
396 f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
400 zos_forward(tag, neqn*e.
nnode, neqn*e.
nnode, 1, &x_local[0], &f_local[0]);
402 fov_reverse(tag, neqn*e.
nnode, neqn*e.
nnode, ncol, w_local, adj_local);
404 for (
unsigned int node=0; node<e.
nnode; node++)
405 for (
unsigned int eqn=0; eqn<neqn; eqn++)
406 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
408 for (
unsigned int col=0; col<ncol; col++)
409 for (
unsigned int node=0; node<e.
nnode; node++)
410 for (
unsigned int eqn=0; eqn<neqn; eqn++)
411 adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
417 const std::vector<double>& f_local,
418 const std::vector< std::vector<double> >& adj_local,
419 std::vector<double>& f,
420 std::vector< std::vector<double> >& adj) {
421 for (
unsigned int node=0; node<e.
nnode; node++)
422 for (
unsigned int eqn=0; eqn<neqn; eqn++)
423 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
425 for (
unsigned int col=0; col<adj.size(); col++)
426 for (
unsigned int node=0; node<e.
nnode; node++)
427 for (
unsigned int eqn=0; eqn<neqn; eqn++)
428 adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
433 const std::vector<double>& f_local,
434 std::vector<double>& f) {
435 for (
unsigned int node=0; node<e.
nnode; node++)
436 for (
unsigned int eqn=0; eqn<neqn; eqn++)
437 f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
440 template <
typename FadType>
442 unsigned int num_cols,
double mesh_spacing) {
446 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
447 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
448 std::vector< std::vector<double> > adj(num_cols);
449 for (
unsigned int node=0; node<num_nodes; node++)
450 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
451 x[node*num_eqns+eqn] =
452 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
453 f[node*num_eqns+eqn] = 0.0;
456 for (
unsigned int col=0; col<num_cols; col++) {
457 w[col] = std::vector<double>(num_nodes*num_eqns);
458 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
459 adj[col] = std::vector<double>(num_nodes*num_eqns);
460 for (
unsigned int node=0; node<num_nodes; node++)
461 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
462 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
463 adj[col][node*num_eqns+eqn] = 0.0;
469 std::vector<FadType> x_fad(e.
nnode*num_eqns), f_fad(e.
nnode*num_eqns);
470 std::vector<FadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
471 for (
unsigned int i=0; i<num_nodes-1; i++) {
498 unsigned int num_cols,
double mesh_spacing) {
502 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
503 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
504 std::vector< std::vector<double> > adj(num_cols);
505 for (
unsigned int node=0; node<num_nodes; node++)
506 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
507 x[node*num_eqns+eqn] =
508 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
509 f[node*num_eqns+eqn] = 0.0;
512 for (
unsigned int col=0; col<num_cols; col++) {
513 w[col] = std::vector<double>(num_nodes*num_eqns);
514 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
515 adj[col] = std::vector<double>(num_nodes*num_eqns);
516 for (
unsigned int node=0; node<num_nodes; node++)
517 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
518 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
519 adj[col][node*num_eqns+eqn] = 0.0;
525 std::vector<RadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
526 std::vector<RadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
527 std::vector<RadType*> ff_rad(f_rad.size());
528 for (
unsigned int i=0; i<f_rad.size(); i++)
529 ff_rad[i] = &f_rad[i];
530 for (
unsigned int i=0; i<num_nodes-1; i++) {
557 unsigned int num_cols,
double mesh_spacing) {
561 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
562 std::vector< std::vector<double> > w(num_cols);
563 double **w_local =
new double*[num_cols];
564 std::vector< std::vector<double> > adj(num_cols);
565 for (
unsigned int node=0; node<num_nodes; node++)
566 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
567 x[node*num_eqns+eqn] =
568 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
569 f[node*num_eqns+eqn] = 0.0;
572 for (
unsigned int col=0; col<num_cols; col++) {
573 w[col] = std::vector<double>(num_nodes*num_eqns);
574 w_local[col] =
new double[e.
nnode*num_eqns];
575 adj[col] = std::vector<double>(num_nodes*num_eqns);
576 for (
unsigned int node=0; node<num_nodes; node++)
577 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
578 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
579 adj[col][node*num_eqns+eqn] = 0.0;
585 std::vector<VRadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
586 std::vector<VRadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
588 std::vector<std::size_t> vn(num_cols);
589 for (
unsigned int i=0; i<num_cols; i++) {
591 vn[i] = num_eqns*e.
nnode;
592 for (
unsigned int j=0; j<num_eqns*e.
nnode; j++)
593 vf_rad[i][j] = &f_rad[j];
595 for (
unsigned int i=0; i<num_nodes-1; i++) {
604 for (
unsigned int col=0; col<num_cols; col++) {
605 delete [] w_local[col];
606 delete [] vf_rad[col];
629 double adolc_adj_fill(
unsigned int num_nodes,
unsigned int num_eqns,
630 unsigned int num_cols,
double mesh_spacing) {
634 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
635 std::vector< std::vector<double> > w(num_cols);
636 std::vector< std::vector<double> > adj(num_cols);
637 for (
unsigned int node=0; node<num_nodes; node++)
638 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
639 x[node*num_eqns+eqn] =
640 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
641 f[node*num_eqns+eqn] = 0.0;
644 for (
unsigned int col=0; col<num_cols; col++) {
645 w[col] = std::vector<double>(num_nodes*num_eqns);
646 adj[col] = std::vector<double>(num_nodes*num_eqns);
647 for (
unsigned int node=0; node<num_nodes; node++)
648 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
649 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
650 adj[col][node*num_eqns+eqn] = 0.0;
656 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
657 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
658 std::vector<double> x_local(e.
nnode*num_eqns);
659 std::vector<double> f_local(e.
nnode*num_eqns);
660 double **adj_local =
new double*[num_cols];
661 double **w_local =
new double*[num_cols];
662 for (
unsigned int i=0; i<num_cols; i++) {
663 adj_local[i] =
new double[e.
nnode*num_eqns];
664 w_local[i] =
new double[e.
nnode*num_eqns];
670 adolc_init_fill(
true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
672 adolc_process_fill(
true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
673 f_local, f, adj_local, adj);
676 for (
unsigned int i=1; i<num_nodes-1; i++) {
680 adolc_init_fill(
false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
681 adolc_process_fill(
false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
682 f_local, f, adj_local, adj);
684 for (
unsigned int i=0; i<num_cols; i++) {
685 delete [] adj_local[i];
686 delete [] w_local[i];
705 return timer.totalElapsedTime();
708 double adolc_retape_adj_fill(
unsigned int num_nodes,
unsigned int num_eqns,
709 unsigned int num_cols,
double mesh_spacing) {
713 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
714 std::vector< std::vector<double> > w(num_cols);
715 std::vector< std::vector<double> > adj(num_cols);
716 for (
unsigned int node=0; node<num_nodes; node++)
717 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
718 x[node*num_eqns+eqn] =
719 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
720 f[node*num_eqns+eqn] = 0.0;
723 for (
unsigned int col=0; col<num_cols; col++) {
724 w[col] = std::vector<double>(num_nodes*num_eqns);
725 adj[col] = std::vector<double>(num_nodes*num_eqns);
726 for (
unsigned int node=0; node<num_nodes; node++)
727 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
728 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
729 adj[col][node*num_eqns+eqn] = 0.0;
735 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
736 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
737 std::vector<double> x_local(e.
nnode*num_eqns);
738 std::vector<double> f_local(e.
nnode*num_eqns);
739 double **adj_local =
new double*[num_cols];
740 double **w_local =
new double*[num_cols];
741 for (
unsigned int i=0; i<num_cols; i++) {
742 adj_local[i] =
new double[e.
nnode*num_eqns];
743 w_local[i] =
new double[e.
nnode*num_eqns];
746 for (
unsigned int i=0; i<num_nodes-1; i++) {
750 adolc_init_fill(
true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
752 adolc_process_fill(
true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
753 f_local, f, adj_local, adj);
755 for (
unsigned int i=0; i<num_cols; i++) {
756 delete [] adj_local[i];
757 delete [] w_local[i];
776 return timer.totalElapsedTime();
781 unsigned int num_cols,
double mesh_spacing) {
785 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
786 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
787 std::vector< std::vector<double> > adj(num_cols);
788 for (
unsigned int node=0; node<num_nodes; node++)
789 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
790 x[node*num_eqns+eqn] =
791 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
792 f[node*num_eqns+eqn] = 0.0;
795 for (
unsigned int col=0; col<num_cols; col++) {
796 w[col] = std::vector<double>(num_nodes*num_eqns);
797 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
798 adj[col] = std::vector<double>(num_nodes*num_eqns);
799 for (
unsigned int node=0; node<num_nodes; node++)
800 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
801 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
802 adj[col][node*num_eqns+eqn] = 0.0;
808 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
809 std::vector< std::vector<double> > adj_local(num_cols);
810 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
811 for (
unsigned int i=0; i<num_cols; i++)
812 adj_local[i] = std::vector<double>(e.
nnode*num_eqns);
813 for (
unsigned int i=0; i<num_nodes-1; i++) {
842 unsigned int num_cols,
double mesh_spacing) {
846 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
847 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
848 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
849 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
850 unsigned int row = node_row*num_eqns + eqn_row;
851 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
856 for (
unsigned int col=0; col<num_cols; col++) {
857 w[col] = std::vector<double>(num_nodes*num_eqns);
858 w_local[col] = std::vector<double>(e.
nnode*num_eqns);
859 for (
unsigned int node=0; node<num_nodes; node++)
860 for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
861 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
867 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
868 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
869 for (
unsigned int i=0; i<num_nodes-1; i++) {
887 int main(
int argc,
char* argv[]) {
897 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
898 int num_nodes = 100000;
902 clp.
setOption(
"n", &num_nodes,
"Number of nodes");
903 clp.
setOption(
"p", &num_eqns,
"Number of equations");
904 clp.
setOption(
"q", &num_cols,
"Number of adjoint directions");
905 clp.
setOption(
"rt", &rt,
"Include ADOL-C retaping test");
909 parseReturn= clp.
parse(argc, argv);
913 double mesh_spacing = 1.0 /
static_cast<double>(num_nodes - 1);
915 std::cout.setf(std::ios::scientific);
916 std::cout.precision(p);
917 std::cout <<
"num_nodes = " << num_nodes
918 <<
", num_eqns = " << num_eqns
919 <<
", num_cols = " << num_cols <<
": " << std::endl
920 <<
" " <<
" Time" <<
"\t"<<
"Time/Analytic" <<
"\t"
921 <<
"Time/Residual" << std::endl;
926 tr =
residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
929 std::cout <<
"Analytic: " << std::setw(w) << ta <<
"\t" << std::setw(w) << ta/ta <<
"\t" << std::setw(w) << ta/tr << std::endl;
932 t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
933 std::cout <<
"ADOL-C: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
936 t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
937 std::cout <<
"ADOL-C(rt):" << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
941 t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
942 std::cout <<
"DFad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
944 t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
945 std::cout <<
"ELRDFad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
947 t =
rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
948 std::cout <<
"Rad: " << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
950 t =
vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
951 std::cout <<
"Vector Rad:" << std::setw(w) << t <<
"\t" << std::setw(w) << t/ta <<
"\t" << std::setw(w) << t/tr << std::endl;
954 catch (std::exception& e) {
955 std::cout << e.what() << std::endl;
958 catch (
const char *s) {
959 std::cout << s << std::endl;
963 std::cout <<
"Caught unknown exception!" << std::endl;
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)