33 const std::vector<double>& x,
34 std::vector<double>& x_local) {
35 for (
unsigned int node=0; node<e.
nnode; node++)
36 for (
unsigned int eqn=0; eqn<neqn; eqn++)
37 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
42 const std::vector<double>& x,
43 std::vector<double>& u,
44 std::vector<double>& du,
45 std::vector<double>& f,
46 std::vector< std::vector<double> >& jac) {
48 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
49 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
51 du[qp*neqn+eqn] = 0.0;
52 for (
unsigned int node=0; node<e.
nnode; node++) {
53 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
54 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
60 std::vector<double> s(e.
nqp);
61 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
63 for (
unsigned int eqn=0; eqn<neqn; eqn++)
64 s[qp] += u[qp*neqn+eqn]*u[qp*neqn+eqn];
68 for (
unsigned int node_row=0; node_row<e.
nnode; node_row++) {
69 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
70 unsigned int row = node_row*neqn+eqn_row;
72 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
73 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
74 unsigned int col = node_col*neqn+eqn_col;
78 for (
unsigned int qp=0; qp<e.
nqp; qp++) {
79 double c1 = e.
w[qp]*e.
jac[qp];
80 double c2 = -e.
dphi[qp][node_row]/(e.
jac[qp]*e.
jac[qp]);
81 double c3 =
exp(u[qp*neqn+eqn_row]);
82 double c4 = e.
phi[qp][node_row]*s[qp]*c3;
83 f[row] += c1*(c2*du[qp*neqn+eqn_row] + c4);
84 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
85 jac[row][node_col*neqn+eqn_row] +=
86 c1*(c2*e.
dphi[qp][node_col] + c4*e.
phi[qp][node_col]);
87 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++)
88 jac[row][node_col*neqn+eqn_col] +=
89 2.0*c1*e.
phi[qp][node_row]*u[qp*neqn+eqn_col]*e.
phi[qp][node_col]*c3;
98 const std::vector<double>& f_local,
99 const std::vector< std::vector<double> >& jac_local,
100 std::vector<double>& f,
101 std::vector< std::vector<double> >& jac) {
102 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
103 f[e.
gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
104 f[e.
gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
105 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
106 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
107 unsigned int col = node_col*neqn+eqn_col;
108 unsigned int next_col = (node_col+1)*neqn+eqn_col;
109 jac[e.
gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
110 jac[e.
gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
117 double mesh_spacing) {
121 std::vector<double> x(num_nodes*num_eqns),
f(num_nodes*num_eqns);
122 std::vector< std::vector<double> > jac(num_nodes*num_eqns);
123 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
124 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
125 unsigned int row = node_row*num_eqns + eqn_row;
126 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
128 jac[row] = std::vector<double>((e.
nnode+1)*num_eqns);
129 for (
unsigned int node_col=0; node_col<e.
nnode+1; node_col++) {
130 for (
unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
131 unsigned int col = node_col*num_eqns + eqn_col;
140 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
141 std::vector< std::vector<double> > jac_local(e.
nnode*num_eqns);
142 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
143 for (
unsigned int i=0; i<e.
nnode*num_eqns; i++)
144 jac_local[i] = std::vector<double>(e.
nnode*num_eqns);
145 for (
unsigned int i=0; i<num_nodes-1; i++) {
175 const std::vector<double>& f_local,
176 std::vector<double>& f) {
177 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
178 f[e.
gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
179 f[e.
gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
184 double mesh_spacing) {
188 std::vector<double> x(num_nodes*num_eqns),
f(num_nodes*num_eqns);
189 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
190 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
191 unsigned int row = node_row*num_eqns + eqn_row;
192 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
199 std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
200 std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
201 for (
unsigned int i=0; i<num_nodes-1; i++) {
220 #ifndef ADOLC_TAPELESS
221 void adolc_init_fill(
bool retape,
225 const std::vector<double>& x,
226 std::vector<double>& x_local,
227 std::vector<adouble>& x_ad) {
230 for (
unsigned int node=0; node<e.
nnode; node++)
231 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
232 x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
234 x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
238 void adolc_process_fill(
bool retape,
242 std::vector<double>& x_local,
243 std::vector<adouble>& f_ad,
244 std::vector<double>& f,
245 std::vector<double>& f_local,
246 std::vector< std::vector<double> >& jac,
248 double **jac_local) {
250 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
251 f_ad[0*neqn+eqn_row] >>= f_local[0*neqn+eqn_row];
252 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
253 f_ad[1*neqn+eqn_row] >>= f_local[1*neqn+eqn_row];
259 seed, &f_local[0], jac_local);
261 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
262 f[e.
gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
263 f[e.
gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
264 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
265 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
266 unsigned int col = node_col*neqn+eqn_col;
267 unsigned int next_col = (node_col+1)*neqn+eqn_col;
268 jac[e.
gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
269 jac[e.
gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
275 double adolc_jac_fill(
unsigned int num_nodes,
unsigned int num_eqns,
276 double mesh_spacing) {
280 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
281 std::vector< std::vector<double> > jac(num_nodes*num_eqns);
282 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
283 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
284 unsigned int row = node_row*num_eqns + eqn_row;
285 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
287 jac[row] = std::vector<double>((e.
nnode+1)*num_eqns);
288 for (
unsigned int node_col=0; node_col<e.
nnode+1; node_col++) {
289 for (
unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
290 unsigned int col = node_col*num_eqns + eqn_col;
299 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
300 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
301 std::vector<double> x_local(e.
nnode*num_eqns);
302 std::vector<double> f_local(e.
nnode*num_eqns);
303 double **jac_local =
new double*[e.
nnode*num_eqns];
304 double **seed =
new double*[e.
nnode*num_eqns];
305 for (
unsigned int i=0; i<e.
nnode*num_eqns; i++) {
306 jac_local[i] =
new double[e.
nnode*num_eqns];
307 seed[i] =
new double[e.
nnode*num_eqns];
308 for (
unsigned int j=0; j<e.
nnode*num_eqns; j++)
316 adolc_init_fill(
true, 0, e, num_eqns, x, x_local, x_ad);
318 adolc_process_fill(
true, 0, e, num_eqns, x_local, f_ad, f, f_local,
319 jac, seed, jac_local);
322 for (
unsigned int i=1; i<num_nodes-1; i++) {
326 adolc_init_fill(
false, 0, e, num_eqns, x, x_local, x_ad);
327 adolc_process_fill(
false, 0, e, num_eqns, x_local, f_ad, f, f_local,
328 jac, seed, jac_local);
330 for (
unsigned int i=0; i<e.
nnode*num_eqns; i++) {
331 delete [] jac_local[i];
351 return timer.totalElapsedTime();
354 double adolc_retape_jac_fill(
unsigned int num_nodes,
unsigned int num_eqns,
355 double mesh_spacing) {
359 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
360 std::vector< std::vector<double> > jac(num_nodes*num_eqns);
361 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
362 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
363 unsigned int row = node_row*num_eqns + eqn_row;
364 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
366 jac[row] = std::vector<double>((e.
nnode+1)*num_eqns);
367 for (
unsigned int node_col=0; node_col<e.
nnode+1; node_col++) {
368 for (
unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
369 unsigned int col = node_col*num_eqns + eqn_col;
376 Teuchos::Time timer(
"FE ADOL-C Retape Jacobian Fill",
false);
378 std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
379 std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
380 std::vector<double> x_local(e.
nnode*num_eqns);
381 std::vector<double> f_local(e.
nnode*num_eqns);
382 double **jac_local =
new double*[e.
nnode*num_eqns];
383 double **seed =
new double*[e.
nnode*num_eqns];
384 for (
unsigned int i=0; i<e.
nnode*num_eqns; i++) {
385 jac_local[i] =
new double[e.
nnode*num_eqns];
386 seed[i] =
new double[e.
nnode*num_eqns];
387 for (
unsigned int j=0; j<e.
nnode*num_eqns; j++)
391 for (
unsigned int i=0; i<num_nodes-1; i++) {
395 adolc_init_fill(
true, 1, e, num_eqns, x, x_local, x_ad);
397 adolc_process_fill(
true, 1, e, num_eqns, x_local, f_ad, f, f_local,
398 jac, seed, jac_local);
400 for (
unsigned int i=0; i<e.
nnode*num_eqns; i++) {
401 delete [] jac_local[i];
421 return timer.totalElapsedTime();
426 void adolc_tapeless_init_fill(
const ElemData& e,
428 const std::vector<double>& x,
429 std::vector<adtl::adouble>& x_ad) {
430 for (
unsigned int node=0; node<e.
nnode; node++)
431 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
432 x_ad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
433 for (
unsigned int i=0; i<neqn*e.
nnode; i++)
434 x_ad[node*neqn+eqn].setADValue(i, 0.0);
435 x_ad[node*neqn+eqn].setADValue(node*neqn+eqn, 1.0);
440 void adolc_tapeless_process_fill(
const ElemData& e,
442 const std::vector<adtl::adouble>& f_ad,
443 std::vector<double>& f,
444 std::vector< std::vector<double> >& jac) {
445 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
446 f[e.
gid[0]*neqn+eqn_row] += f_ad[0*neqn+eqn_row].getValue();
447 f[e.
gid[1]*neqn+eqn_row] += f_ad[1*neqn+eqn_row].getValue();
448 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
449 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
450 unsigned int col = node_col*neqn+eqn_col;
451 unsigned int next_col = (node_col+1)*neqn+eqn_col;
452 jac[e.
gid[0]*neqn+eqn_row][next_col] +=
453 f_ad[0*neqn+eqn_row].getADValue(col);
454 jac[e.
gid[1]*neqn+eqn_row][col] +=
455 f_ad[1*neqn+eqn_row].getADValue(col);
461 double adolc_tapeless_jac_fill(
unsigned int num_nodes,
unsigned int num_eqns,
462 double mesh_spacing) {
466 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
467 std::vector< std::vector<double> > jac(num_nodes*num_eqns);
468 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
469 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
470 unsigned int row = node_row*num_eqns + eqn_row;
471 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
473 jac[row] = std::vector<double>((e.
nnode+1)*num_eqns);
474 for (
unsigned int node_col=0; node_col<e.
nnode+1; node_col++) {
475 for (
unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
476 unsigned int col = node_col*num_eqns + eqn_col;
483 Teuchos::Time timer(
"FE Tapeless ADOL-C Jacobian Fill",
false);
485 std::vector<adtl::adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
486 std::vector<adtl::adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
487 for (
unsigned int i=0; i<num_nodes-1; i++) {
491 adolc_tapeless_init_fill(e, num_eqns, x, x_ad);
493 adolc_tapeless_process_fill(e, num_eqns, f_ad, f, jac);
510 return timer.totalElapsedTime();
516 void adic_init_fill(
const ElemData& e,
518 const std::vector<double>& x,
519 std::vector<DERIV_TYPE>& x_fad) {
520 static bool first =
true;
521 for (
unsigned int node=0; node<e.
nnode; node++)
522 for (
unsigned int eqn=0; eqn<neqn; eqn++) {
523 x_fad[node*neqn+eqn].value = x[e.
gid[node]*neqn+eqn];
525 ad_AD_SetIndep(x_fad[node*neqn+eqn]);
528 ad_AD_SetIndepDone();
550 unsigned int var_0, var_1, var_2, var_3, var_4, var_5, var_6, var_7;
572 for (
unsigned int qp = 0; qp < e->
nqp; ) {
573 for (
unsigned int eqn = 0; eqn < neqn; ) {
575 ad_grad_axpy_0(&(u[qp * neqn + eqn]));
579 ad_grad_axpy_0(&(du[qp * neqn + eqn]));
582 for (
unsigned int node = 0; node < e->
nnode; ) {
584 loc_0 =
DERIV_val(x[node * neqn + eqn]) * e->
phi[qp][node];
585 loc_1 =
DERIV_val(u[qp * neqn + eqn]) + loc_0;
586 ad_grad_axpy_2(&(u[qp * neqn + eqn]), 1.000000000000000e+00, &(u[qp * neqn + eqn]), e->
phi[qp][node], &(x[node * neqn + eqn]));
591 loc_1 =
DERIV_val(du[qp * neqn + eqn]) + loc_0;
592 ad_grad_axpy_2(&(du[qp * neqn + eqn]), 1.000000000000000e+00, &(du[qp * neqn + eqn]), e->
dphi[qp][node], &(x[node * neqn + eqn]));
602 std::vector<DERIV_TYPE> s(e->
nqp);
603 for (
unsigned int qp = 0; qp < e->
nqp; ) {
605 ad_grad_axpy_0(&(s[qp]));
608 for (
unsigned int eqn = 0; eqn < neqn; ) {
612 ad_grad_axpy_3(&(s[qp]), 1.000000000000000e+00, &(s[qp]),
DERIV_val(u[qp * neqn + eqn]), &(u[qp * neqn + eqn]),
DERIV_val(u[qp * neqn + eqn]), &(u[qp * neqn + eqn]));
619 for (
unsigned int node = 0; node < e->
nnode; ) {
620 for (
unsigned int eqn = 0; eqn < neqn; ) {
621 unsigned int row = node * neqn + eqn;
623 ad_grad_axpy_0(&(f[row]));
626 for (
unsigned int qp = 0; qp < e->
nqp; ) {
630 ad_grad_axpy_1(&(var_8), adji_0, &(u[qp * neqn + eqn]));
633 loc_0 = e->
w[qp] * e->
jac[qp];
634 loc_1 = -e->
dphi[qp][node];
635 loc_2 = e->
jac[qp] * e->
jac[qp];
636 loc_3 = loc_1 / loc_2;
637 loc_4 = loc_3 *
DERIV_val(du[qp * neqn + eqn]);
640 loc_7 = loc_4 + loc_6;
641 loc_8 = loc_0 * loc_7;
643 adj_0 = loc_5 * loc_0;
645 adj_2 = e->
phi[qp][node] * adj_1;
646 adj_3 = loc_3 * loc_0;
647 ad_grad_axpy_4(&(f[row]), 1.000000000000000e+00, &(f[row]), adj_3, &(du[qp * neqn + eqn]), adj_2, &(s[qp]), adj_0, &(var_8));
659 void adic_process_fill(
const ElemData& e,
661 const std::vector<DERIV_TYPE>& f_fad,
662 std::vector<double>& f,
663 std::vector< std::vector<double> >& jac) {
664 for (
unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
665 f[e.
gid[0]*neqn+eqn_row] += f_fad[0*neqn+eqn_row].value;
666 f[e.
gid[1]*neqn+eqn_row] += f_fad[1*neqn+eqn_row].value;
667 for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
668 for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
669 unsigned int col = node_col*neqn+eqn_col;
670 unsigned int next_col = (node_col+1)*neqn+eqn_col;
671 jac[e.
gid[0]*neqn+eqn_row][next_col] +=
672 f_fad[0*neqn+eqn_row].grad[col];
673 jac[e.
gid[1]*neqn+eqn_row][col] +=
674 f_fad[1*neqn+eqn_row].grad[col];
680 double adic_jac_fill(
unsigned int num_nodes,
unsigned int num_eqns,
681 double mesh_spacing) {
686 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
687 std::vector< std::vector<double> > jac(num_nodes*num_eqns);
688 for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
689 for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
690 unsigned int row = node_row*num_eqns + eqn_row;
691 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
693 jac[row] = std::vector<double>((e.
nnode+1)*num_eqns);
694 for (
unsigned int node_col=0; node_col<e.
nnode+1; node_col++) {
695 for (
unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
696 unsigned int col = node_col*num_eqns + eqn_col;
705 std::vector<DERIV_TYPE> x_fad(e.
nnode*num_eqns), f_fad(e.
nnode*num_eqns);
706 std::vector<DERIV_TYPE> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
707 for (
unsigned int i=0; i<num_nodes-1; i++) {
711 adic_init_fill(e, num_eqns, x, x_fad);
713 adic_process_fill(e, num_eqns, f_fad, f, jac);
732 return timer.totalElapsedTime();
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
void adic_element_fill(ElemData *e, unsigned int neqn, const DERIV_TYPE *x, DERIV_TYPE *u, DERIV_TYPE *du, DERIV_TYPE *f)
double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
void start(bool reset=false)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
double totalElapsedTime(bool readCurrentTime=false) const