50 std::vector<double>
A(m*k),
B(k*n),
C(m*n);
51 for (
unsigned int j=0; j<k; j++)
52 for (
unsigned int i=0; i<m; i++)
54 for (
unsigned int j=0; j<n; j++)
55 for (
unsigned int i=0; i<k; i++)
57 for (
unsigned int j=0; j<n; j++)
58 for (
unsigned int i=0; i<m; i++)
60 double alpha = urand.
number();
61 double beta = urand.
number();
65 for (
unsigned int j=0; j<nloop; j++) {
67 &
B[0], k, beta, &C[0], m);
80 std::vector<double>
A(m*n),
B(n),
C(m);
81 for (
unsigned int j=0; j<n; j++) {
82 for (
unsigned int i=0; i<m; i++)
86 for (
unsigned int i=0; i<m; i++)
88 double alpha = urand.
number();
89 double beta = urand.
number();
93 for (
unsigned int j=0; j<nloop; j++) {
94 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
107 std::vector<double> X(m), Y(m);
108 for (
unsigned int i=0; i<m; i++) {
116 for (
unsigned int j=0; j<nloop; j++) {
117 z += blas.
DOT(m, &X[0], 1, &Y[0], 1);
124 template <
typename FadType>
127 unsigned int ndot,
unsigned int nloop)
132 std::vector<FadType>
A(m*k),
B(k*n),
C(m*n);
133 for (
unsigned int j=0; j<k; j++) {
134 for (
unsigned int i=0; i<m; i++) {
136 for (
unsigned int l=0; l<ndot; l++)
140 for (
unsigned int j=0; j<n; j++) {
141 for (
unsigned int i=0; i<k; i++) {
143 for (
unsigned int l=0; l<ndot; l++)
147 for (
unsigned int j=0; j<n; j++) {
148 for (
unsigned int i=0; i<m; i++) {
150 for (
unsigned int l=0; l<ndot; l++)
156 for (
unsigned int l=0; l<ndot; l++) {
157 alpha.fastAccessDx(l) = urand.
number();
158 beta.fastAccessDx(l) = urand.
number();
163 for (
unsigned int j=0; j<nloop; j++) {
165 &
B[0], k, beta, &C[0], m);
172 template <
typename FadType>
180 std::vector<FadType>
A(m*n),
B(n),
C(m);
181 for (
unsigned int j=0; j<n; j++) {
182 for (
unsigned int i=0; i<m; i++) {
185 for (
unsigned int k=0; k<ndot; k++)
189 for (
unsigned int k=0; k<ndot; k++)
192 for (
unsigned int i=0; i<m; i++) {
194 for (
unsigned int k=0; k<ndot; k++)
199 for (
unsigned int k=0; k<ndot; k++) {
200 alpha.fastAccessDx(k) = urand.
number();
201 beta.fastAccessDx(k) = urand.
number();
206 for (
unsigned int j=0; j<nloop; j++) {
207 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
214 template <
typename FadType>
221 std::vector<FadType> X(m), Y(m);
222 for (
unsigned int i=0; i<m; i++) {
225 for (
unsigned int k=0; k<ndot; k++) {
226 X[i].fastAccessDx(k) = urand.
number();
227 Y[i].fastAccessDx(k) = urand.
number();
233 for (
unsigned int j=0; j<nloop; j++) {
241 template <
typename FadType>
244 unsigned int ndot,
unsigned int nloop,
bool use_dynamic)
247 unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
252 for (
unsigned int j=0; j<k; j++) {
253 for (
unsigned int i=0; i<m; i++) {
255 for (
unsigned int l=0; l<ndot; l++)
259 for (
unsigned int j=0; j<n; j++) {
260 for (
unsigned int i=0; i<k; i++) {
262 for (
unsigned int l=0; l<ndot; l++)
266 for (
unsigned int j=0; j<n; j++) {
267 for (
unsigned int i=0; i<m; i++) {
269 for (
unsigned int l=0; l<ndot; l++)
275 for (
unsigned int l=0; l<ndot; l++) {
276 alpha.fastAccessDx(l) = urand.
number();
277 beta.fastAccessDx(l) = urand.
number();
282 for (
unsigned int j=0; j<nloop; j++) {
284 &
B[0], k, beta, &C[0], m);
291 template <
typename FadType>
294 unsigned int nloop,
bool use_dynamic)
297 unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
301 for (
unsigned int j=0; j<n; j++) {
302 for (
unsigned int i=0; i<m; i++) {
305 for (
unsigned int k=0; k<ndot; k++)
309 for (
unsigned int k=0; k<ndot; k++)
312 for (
unsigned int i=0; i<m; i++) {
314 for (
unsigned int k=0; k<ndot; k++)
319 for (
unsigned int k=0; k<ndot; k++) {
320 alpha.fastAccessDx(k) = urand.
number();
321 beta.fastAccessDx(k) = urand.
number();
326 for (
unsigned int j=0; j<nloop; j++) {
327 blas.
GEMV(
Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1, beta, &C[0], 1);
334 template <
typename FadType>
337 unsigned int nloop,
bool use_dynamic)
340 unsigned int sz = 2*m*(1+ndot);
344 for (
unsigned int i=0; i<m; i++) {
347 for (
unsigned int k=0; k<ndot; k++) {
348 X[i].fastAccessDx(k) = urand.
number();
349 Y[i].fastAccessDx(k) = urand.
number();
355 for (
unsigned int j=0; j<nloop; j++) {
356 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
363 int main(
int argc,
char* argv[]) {
373 clp.
setDocString(
"This program tests the speed of differentiating BLAS routines using Fad");
375 clp.
setOption(
"m", &m,
"Number of rows");
377 clp.
setOption(
"n", &n,
"Number of columns");
379 clp.
setOption(
"k", &k,
"Number of columns for GEMM");
381 clp.
setOption(
"ndot", &ndot,
"Number of derivative components");
383 clp.
setOption(
"nloop", &nloop,
"Number of loops");
385 clp.
setOption(
"dynamic", &dynamic,
"Use dynamic allocation");
389 parseReturn= clp.
parse(argc, argv);
392 bool use_dynamic = (dynamic != 0);
394 std::cout.setf(std::ios::scientific);
395 std::cout.precision(p);
396 std::cout <<
"Times (sec) for m = " << m <<
", n = " << n
397 <<
", ndot = " << ndot <<
", nloop = " << nloop
398 <<
", dynamic = " << use_dynamic <<
": "
402 std::cout <<
"GEMM: " << std::setw(w) << tb << std::endl;
404 t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
405 std::cout <<
"Sacado DVFad GEMM: " << std::setw(w) << t <<
"\t"
406 << std::setw(w) << t/tb << std::endl;
408 t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
409 std::cout <<
"Sacado DFad GEMM: " << std::setw(w) << t <<
"\t"
410 << std::setw(w) << t/tb << std::endl;
412 t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
413 std::cout <<
"Teuchos DFad GEMM: " << std::setw(w) << t <<
"\t"
414 << std::setw(w) << t/tb << std::endl;
420 t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
421 std::cout <<
"Teuchos DVFad GEMM: " << std::setw(w) << t <<
"\t"
422 << std::setw(w) << t/tb << std::endl;
424 std::cout << std::endl;
427 std::cout <<
"GEMV: " << std::setw(w) << tb << std::endl;
429 t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
430 std::cout <<
"Sacado DVFad GEMV: " << std::setw(w) << t <<
"\t"
431 << std::setw(w) << t/tb << std::endl;
433 t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
434 std::cout <<
"Sacado DFad GEMV: " << std::setw(w) << t <<
"\t"
435 << std::setw(w) << t/tb << std::endl;
437 t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
438 std::cout <<
"Teuchos DFad GEMV: " << std::setw(w) << t <<
"\t"
439 << std::setw(w) << t/tb << std::endl;
445 t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
446 std::cout <<
"Teuchos DVFad GEMV: " << std::setw(w) << t <<
"\t"
447 << std::setw(w) << t/tb << std::endl;
449 std::cout << std::endl;
452 std::cout <<
"DOT: " << std::setw(w) << tb << std::endl;
454 t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
455 std::cout <<
"Sacado DVFad DOT: " << std::setw(w) << t <<
"\t"
456 << std::setw(w) << t/tb << std::endl;
458 t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
459 std::cout <<
"Sacado DFad DOT: " << std::setw(w) << t <<
"\t"
460 << std::setw(w) << t/tb << std::endl;
462 t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
463 std::cout <<
"Teuchos DFad DOT: " << std::setw(w) << t <<
"\t"
464 << std::setw(w) << t/tb << std::endl;
470 t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
471 std::cout <<
"Teuchos DVFad DOT: " << std::setw(w) << t <<
"\t"
472 << std::setw(w) << t/tb << std::endl;
475 catch (std::exception& e) {
476 std::cout << e.what() << std::endl;
479 catch (
const char *s) {
480 std::cout << s << std::endl;
484 std::cout <<
"Caught unknown exception!" << std::endl;
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
double do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop)
Sacado::Fad::DFad< double > FadType
double DOT(const int &n, const double *x, const int &incx, const double *y, const int &incy) const
double do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
ScalarT number()
Get random number.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
double do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int nloop)
void start(bool reset=false)
double do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
double do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void GEMM(ETransp transa, ETransp transb, const int &m, const int &n, const int &k, const double &alpha, const double *A, const int &lda, const double *B, const int &ldb, const double &beta, double *C, const int &ldc) const
double do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
double do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop, bool use_dynamic)
void setDocString(const char doc_string[])
void GEMV(ETransp trans, const int &m, const int &n, const double &alpha, const double *A, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy) const
double do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
double totalElapsedTime(bool readCurrentTime=false) const
double do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop)