28 std::vector<double>
A(m*k),
B(k*n),
C(m*n);
29 for (
unsigned int j=0; j<k; j++)
30 for (
unsigned int i=0;
i<m;
i++)
32 for (
unsigned int j=0; j<n; j++)
33 for (
unsigned int i=0;
i<k;
i++)
35 for (
unsigned int j=0; j<n; j++)
36 for (
unsigned int i=0;
i<m;
i++)
38 double alpha = urand.
number();
39 double beta = urand.
number();
43 for (
unsigned int j=0; j<nloop; j++) {
45 &
B[0], k, beta, &C[0], m);
58 std::vector<double>
A(m*n),
B(n),
C(m);
59 for (
unsigned int j=0; j<n; j++) {
60 for (
unsigned int i=0;
i<m;
i++)
64 for (
unsigned int i=0;
i<m;
i++)
66 double alpha = urand.
number();
67 double beta = urand.
number();
71 for (
unsigned int j=0; j<nloop; j++) {
72 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
85 std::vector<double> X(m), Y(m);
86 for (
unsigned int i=0;
i<m;
i++) {
94 for (
unsigned int j=0; j<nloop; j++) {
95 z += blas.
DOT(m, &X[0], 1, &Y[0], 1);
102 template <
typename FadType>
105 unsigned int ndot,
unsigned int nloop)
110 std::vector<FadType>
A(m*k),
B(k*n),
C(m*n);
111 for (
unsigned int j=0; j<k; j++) {
112 for (
unsigned int i=0;
i<m;
i++) {
114 for (
unsigned int l=0; l<ndot; l++)
118 for (
unsigned int j=0; j<n; j++) {
119 for (
unsigned int i=0;
i<k;
i++) {
121 for (
unsigned int l=0; l<ndot; l++)
125 for (
unsigned int j=0; j<n; j++) {
126 for (
unsigned int i=0;
i<m;
i++) {
128 for (
unsigned int l=0; l<ndot; l++)
134 for (
unsigned int l=0; l<ndot; l++) {
135 alpha.fastAccessDx(l) = urand.
number();
136 beta.fastAccessDx(l) = urand.
number();
141 for (
unsigned int j=0; j<nloop; j++) {
143 &
B[0], k, beta, &C[0], m);
150 template <
typename FadType>
158 std::vector<FadType>
A(m*n),
B(n),
C(m);
159 for (
unsigned int j=0; j<n; j++) {
160 for (
unsigned int i=0;
i<m;
i++) {
163 for (
unsigned int k=0; k<ndot; k++)
167 for (
unsigned int k=0; k<ndot; k++)
170 for (
unsigned int i=0;
i<m;
i++) {
172 for (
unsigned int k=0; k<ndot; k++)
177 for (
unsigned int k=0; k<ndot; k++) {
178 alpha.fastAccessDx(k) = urand.
number();
179 beta.fastAccessDx(k) = urand.
number();
184 for (
unsigned int j=0; j<nloop; j++) {
185 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
192 template <
typename FadType>
199 std::vector<FadType> X(m), Y(m);
200 for (
unsigned int i=0;
i<m;
i++) {
203 for (
unsigned int k=0; k<ndot; k++) {
204 X[
i].fastAccessDx(k) = urand.
number();
205 Y[
i].fastAccessDx(k) = urand.
number();
211 for (
unsigned int j=0; j<nloop; j++) {
219 template <
typename FadType>
222 unsigned int ndot,
unsigned int nloop,
bool use_dynamic)
225 unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
230 for (
unsigned int j=0; j<k; j++) {
231 for (
unsigned int i=0;
i<m;
i++) {
233 for (
unsigned int l=0; l<ndot; l++)
237 for (
unsigned int j=0; j<n; j++) {
238 for (
unsigned int i=0;
i<k;
i++) {
240 for (
unsigned int l=0; l<ndot; l++)
244 for (
unsigned int j=0; j<n; j++) {
245 for (
unsigned int i=0;
i<m;
i++) {
247 for (
unsigned int l=0; l<ndot; l++)
253 for (
unsigned int l=0; l<ndot; l++) {
254 alpha.fastAccessDx(l) = urand.
number();
255 beta.fastAccessDx(l) = urand.
number();
260 for (
unsigned int j=0; j<nloop; j++) {
262 &
B[0], k, beta, &C[0], m);
269 template <
typename FadType>
272 unsigned int nloop,
bool use_dynamic)
275 unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
279 for (
unsigned int j=0; j<n; j++) {
280 for (
unsigned int i=0;
i<m;
i++) {
283 for (
unsigned int k=0; k<ndot; k++)
287 for (
unsigned int k=0; k<ndot; k++)
290 for (
unsigned int i=0;
i<m;
i++) {
292 for (
unsigned int k=0; k<ndot; k++)
297 for (
unsigned int k=0; k<ndot; k++) {
298 alpha.fastAccessDx(k) = urand.
number();
299 beta.fastAccessDx(k) = urand.
number();
304 for (
unsigned int j=0; j<nloop; j++) {
305 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
312 template <
typename FadType>
315 unsigned int nloop,
bool use_dynamic)
318 unsigned int sz = 2*m*(1+ndot);
322 for (
unsigned int i=0;
i<m;
i++) {
325 for (
unsigned int k=0; k<ndot; k++) {
326 X[
i].fastAccessDx(k) = urand.
number();
327 Y[
i].fastAccessDx(k) = urand.
number();
333 for (
unsigned int j=0; j<nloop; j++) {
334 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
341 int main(
int argc,
char* argv[]) {
351 clp.
setDocString(
"This program tests the speed of differentiating BLAS routines using Fad");
353 clp.
setOption(
"m", &m,
"Number of rows");
355 clp.
setOption(
"n", &n,
"Number of columns");
357 clp.
setOption(
"k", &k,
"Number of columns for GEMM");
359 clp.
setOption(
"ndot", &ndot,
"Number of derivative components");
361 clp.
setOption(
"nloop", &nloop,
"Number of loops");
363 clp.
setOption(
"dynamic", &dynamic,
"Use dynamic allocation");
367 parseReturn= clp.
parse(argc, argv);
370 bool use_dynamic = (dynamic != 0);
372 std::cout.setf(std::ios::scientific);
373 std::cout.precision(p);
374 std::cout <<
"Times (sec) for m = " << m <<
", n = " << n
375 <<
", ndot = " << ndot <<
", nloop = " << nloop
376 <<
", dynamic = " << use_dynamic <<
": "
380 std::cout <<
"GEMM: " << std::setw(w) << tb << std::endl;
382 t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
383 std::cout <<
"Sacado DVFad GEMM: " << std::setw(w) << t <<
"\t"
384 << std::setw(w) << t/tb << std::endl;
386 t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
387 std::cout <<
"Sacado DFad GEMM: " << std::setw(w) << t <<
"\t"
388 << std::setw(w) << t/tb << std::endl;
390 t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
391 std::cout <<
"Teuchos DFad GEMM: " << std::setw(w) << t <<
"\t"
392 << std::setw(w) << t/tb << std::endl;
398 t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
399 std::cout <<
"Teuchos DVFad GEMM: " << std::setw(w) << t <<
"\t"
400 << std::setw(w) << t/tb << std::endl;
402 std::cout << std::endl;
405 std::cout <<
"GEMV: " << std::setw(w) << tb << std::endl;
407 t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
408 std::cout <<
"Sacado DVFad GEMV: " << std::setw(w) << t <<
"\t"
409 << std::setw(w) << t/tb << std::endl;
411 t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
412 std::cout <<
"Sacado DFad GEMV: " << std::setw(w) << t <<
"\t"
413 << std::setw(w) << t/tb << std::endl;
415 t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
416 std::cout <<
"Teuchos DFad GEMV: " << std::setw(w) << t <<
"\t"
417 << std::setw(w) << t/tb << std::endl;
423 t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
424 std::cout <<
"Teuchos DVFad GEMV: " << std::setw(w) << t <<
"\t"
425 << std::setw(w) << t/tb << std::endl;
427 std::cout << std::endl;
430 std::cout <<
"DOT: " << std::setw(w) << tb << std::endl;
432 t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
433 std::cout <<
"Sacado DVFad DOT: " << std::setw(w) << t <<
"\t"
434 << std::setw(w) << t/tb << std::endl;
436 t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
437 std::cout <<
"Sacado DFad DOT: " << std::setw(w) << t <<
"\t"
438 << std::setw(w) << t/tb << std::endl;
440 t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
441 std::cout <<
"Teuchos DFad DOT: " << std::setw(w) << t <<
"\t"
442 << std::setw(w) << t/tb << std::endl;
448 t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
449 std::cout <<
"Teuchos DVFad DOT: " << std::setw(w) << t <<
"\t"
450 << std::setw(w) << t/tb << std::endl;
453 catch (std::exception& e) {
454 std::cout << e.what() << std::endl;
457 catch (
const char *s) {
458 std::cout << s << std::endl;
462 std::cout <<
"Caught unknown exception!" << std::endl;
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
double do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop)
Sacado::Fad::DFad< double > FadType
double DOT(const int &n, const double *x, const int &incx, const double *y, const int &incy) const
double do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
ScalarT number()
Get random number.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
double do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int nloop)
void start(bool reset=false)
double do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
double do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void GEMM(ETransp transa, ETransp transb, const int &m, const int &n, const int &k, const double &alpha, const double *A, const int &lda, const double *B, const int &ldb, const double &beta, double *C, const int &ldc) const
double do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
double do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void setDocString(const char doc_string[])
void GEMV(ETransp trans, const int &m, const int &n, const double &alpha, const double *A, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy) const
double do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
double totalElapsedTime(bool readCurrentTime=false) const
double do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop)