Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fad_blas.cpp
Go to the documentation of this file.
1 // $Id$
2 // $Source$
3 // @HEADER
4 // ***********************************************************************
5 //
6 // Sacado Package
7 // Copyright (2006) Sandia Corporation
8 //
9 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // This library is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as
14 // published by the Free Software Foundation; either version 2.1 of the
15 // License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 // USA
26 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27 // (etphipp@sandia.gov).
28 //
29 // ***********************************************************************
30 // @HEADER
31 
32 #include "Sacado_Random.hpp"
33 #include "Sacado_No_Kokkos.hpp"
34 #include "Teuchos_BLAS.hpp"
35 #include "Sacado_Fad_BLAS.hpp"
36 
37 #include "Teuchos_Time.hpp"
39 
40 // A performance test that compares the cost of differentiating BLAS routines
41 // with Fad
42 
43 double
44 do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k,
45  unsigned int nloop)
46 {
47  Sacado::Random<double> urand(0.0, 1.0);
49 
50  std::vector<double> A(m*k), B(k*n), C(m*n);
51  for (unsigned int j=0; j<k; j++)
52  for (unsigned int i=0; i<m; i++)
53  A[i+j*m] = urand.number();
54  for (unsigned int j=0; j<n; j++)
55  for (unsigned int i=0; i<k; i++)
56  B[i+j*k] = urand.number();
57  for (unsigned int j=0; j<n; j++)
58  for (unsigned int i=0; i<m; i++)
59  C[i+j*m] = urand.number();
60  double alpha = urand.number();
61  double beta = urand.number();
62 
63  Teuchos::Time timer("Teuchos Double GEMM", false);
64  timer.start(true);
65  for (unsigned int j=0; j<nloop; j++) {
66  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
67  &B[0], k, beta, &C[0], m);
68  }
69  timer.stop();
70 
71  return timer.totalElapsedTime() / nloop;
72 }
73 
74 double
75 do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
76 {
77  Sacado::Random<double> urand(0.0, 1.0);
79 
80  std::vector<double> A(m*n), B(n), C(m);
81  for (unsigned int j=0; j<n; j++) {
82  for (unsigned int i=0; i<m; i++)
83  A[i+j*m] = urand.number();
84  B[j] = urand.number();
85  }
86  for (unsigned int i=0; i<m; i++)
87  C[i] = urand.number();
88  double alpha = urand.number();
89  double beta = urand.number();
90 
91  Teuchos::Time timer("Teuchos Double GEMV", false);
92  timer.start(true);
93  for (unsigned int j=0; j<nloop; j++) {
94  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
95  }
96  timer.stop();
97 
98  return timer.totalElapsedTime() / nloop;
99 }
100 
101 double
102 do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
103 {
104  Sacado::Random<double> urand(0.0, 1.0);
106 
107  std::vector<double> X(m), Y(m);
108  for (unsigned int i=0; i<m; i++) {
109  X[i] = urand.number();
110  Y[i] = urand.number();
111  }
112 
113  Teuchos::Time timer("Teuchos Double DOT", false);
114  timer.start(true);
115  double z = 0.0;
116  for (unsigned int j=0; j<nloop; j++) {
117  z += blas.DOT(m, &X[0], 1, &Y[0], 1);
118  }
119  timer.stop();
120 
121  return timer.totalElapsedTime() / nloop;
122 }
123 
124 template <typename FadType>
125 double
126 do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
127  unsigned int ndot, unsigned int nloop)
128 {
129  Sacado::Random<double> urand(0.0, 1.0);
131 
132  std::vector<FadType> A(m*k), B(k*n), C(m*n);
133  for (unsigned int j=0; j<k; j++) {
134  for (unsigned int i=0; i<m; i++) {
135  A[i+j*m] = FadType(ndot, urand.number());
136  for (unsigned int l=0; l<ndot; l++)
137  A[i+j*m].fastAccessDx(l) = urand.number();
138  }
139  }
140  for (unsigned int j=0; j<n; j++) {
141  for (unsigned int i=0; i<k; i++) {
142  B[i+j*k] = FadType(ndot, urand.number());
143  for (unsigned int l=0; l<ndot; l++)
144  B[i+j*k].fastAccessDx(l) = urand.number();
145  }
146  }
147  for (unsigned int j=0; j<n; j++) {
148  for (unsigned int i=0; i<m; i++) {
149  C[i+j*m] = FadType(ndot, urand.number());
150  for (unsigned int l=0; l<ndot; l++)
151  C[i+j*m].fastAccessDx(l) = urand.number();
152  }
153  }
154  FadType alpha(ndot, urand.number());
155  FadType beta(ndot, urand.number());
156  for (unsigned int l=0; l<ndot; l++) {
157  alpha.fastAccessDx(l) = urand.number();
158  beta.fastAccessDx(l) = urand.number();
159  }
160 
161  Teuchos::Time timer("Teuchos Fad GEMM", false);
162  timer.start(true);
163  for (unsigned int j=0; j<nloop; j++) {
164  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
165  &B[0], k, beta, &C[0], m);
166  }
167  timer.stop();
168 
169  return timer.totalElapsedTime() / nloop;
170 }
171 
172 template <typename FadType>
173 double
174 do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
175  unsigned int nloop)
176 {
177  Sacado::Random<double> urand(0.0, 1.0);
179 
180  std::vector<FadType> A(m*n), B(n), C(m);
181  for (unsigned int j=0; j<n; j++) {
182  for (unsigned int i=0; i<m; i++) {
183  //A[i+j*m] = urand.number();
184  A[i+j*m] = FadType(ndot, urand.number());
185  for (unsigned int k=0; k<ndot; k++)
186  A[i+j*m].fastAccessDx(k) = urand.number();
187  }
188  B[j] = FadType(ndot, urand.number());
189  for (unsigned int k=0; k<ndot; k++)
190  B[j].fastAccessDx(k) = urand.number();
191  }
192  for (unsigned int i=0; i<m; i++) {
193  C[i] = FadType(ndot, urand.number());
194  for (unsigned int k=0; k<ndot; k++)
195  C[i].fastAccessDx(k) = urand.number();
196  }
197  FadType alpha(ndot, urand.number());
198  FadType beta(ndot, urand.number());
199  for (unsigned int k=0; k<ndot; k++) {
200  alpha.fastAccessDx(k) = urand.number();
201  beta.fastAccessDx(k) = urand.number();
202  }
203 
204  Teuchos::Time timer("Teuchos Fad GEMV", false);
205  timer.start(true);
206  for (unsigned int j=0; j<nloop; j++) {
207  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
208  }
209  timer.stop();
210 
211  return timer.totalElapsedTime() / nloop;
212 }
213 
214 template <typename FadType>
215 double
216 do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
217 {
218  Sacado::Random<double> urand(0.0, 1.0);
220 
221  std::vector<FadType> X(m), Y(m);
222  for (unsigned int i=0; i<m; i++) {
223  X[i] = FadType(ndot, urand.number());
224  Y[i] = FadType(ndot, urand.number());
225  for (unsigned int k=0; k<ndot; k++) {
226  X[i].fastAccessDx(k) = urand.number();
227  Y[i].fastAccessDx(k) = urand.number();
228  }
229  }
230 
231  Teuchos::Time timer("Teuchos Fad DOT", false);
232  timer.start(true);
233  for (unsigned int j=0; j<nloop; j++) {
234  FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
235  }
236  timer.stop();
237 
238  return timer.totalElapsedTime() / nloop;
239 }
240 
241 template <typename FadType>
242 double
243 do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
244  unsigned int ndot, unsigned int nloop, bool use_dynamic)
245 {
246  Sacado::Random<double> urand(0.0, 1.0);
247  unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
248  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
249 
250  Sacado::Fad::Vector<unsigned int, FadType> A(m*k,ndot), B(k*n,ndot), C
251  (m*n,ndot);
252  for (unsigned int j=0; j<k; j++) {
253  for (unsigned int i=0; i<m; i++) {
254  A[i+j*m] = FadType(ndot, urand.number());
255  for (unsigned int l=0; l<ndot; l++)
256  A[i+j*m].fastAccessDx(l) = urand.number();
257  }
258  }
259  for (unsigned int j=0; j<n; j++) {
260  for (unsigned int i=0; i<k; i++) {
261  B[i+j*k] = FadType(ndot, urand.number());
262  for (unsigned int l=0; l<ndot; l++)
263  B[i+j*k].fastAccessDx(l) = urand.number();
264  }
265  }
266  for (unsigned int j=0; j<n; j++) {
267  for (unsigned int i=0; i<m; i++) {
268  C[i+j*m] = FadType(ndot, urand.number());
269  for (unsigned int l=0; l<ndot; l++)
270  C[i+j*m].fastAccessDx(l) = urand.number();
271  }
272  }
273  FadType alpha(ndot, urand.number());
274  FadType beta(ndot, urand.number());
275  for (unsigned int l=0; l<ndot; l++) {
276  alpha.fastAccessDx(l) = urand.number();
277  beta.fastAccessDx(l) = urand.number();
278  }
279 
280  Teuchos::Time timer("Teuchos Fad GEMM", false);
281  timer.start(true);
282  for (unsigned int j=0; j<nloop; j++) {
283  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
284  &B[0], k, beta, &C[0], m);
285  }
286  timer.stop();
287 
288  return timer.totalElapsedTime() / nloop;
289 }
290 
291 template <typename FadType>
292 double
293 do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
294  unsigned int nloop, bool use_dynamic)
295 {
296  Sacado::Random<double> urand(0.0, 1.0);
297  unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
298  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
299 
300  Sacado::Fad::Vector<unsigned int, FadType> A(m*n,ndot), B(n,ndot), C(m,ndot);
301  for (unsigned int j=0; j<n; j++) {
302  for (unsigned int i=0; i<m; i++) {
303  //A[i+j*m] = urand.number();
304  A[i+j*m] = FadType(ndot, urand.number());
305  for (unsigned int k=0; k<ndot; k++)
306  A[i+j*m].fastAccessDx(k) = urand.number();
307  }
308  B[j] = FadType(ndot, urand.number());
309  for (unsigned int k=0; k<ndot; k++)
310  B[j].fastAccessDx(k) = urand.number();
311  }
312  for (unsigned int i=0; i<m; i++) {
313  C[i] = FadType(ndot, urand.number());
314  for (unsigned int k=0; k<ndot; k++)
315  C[i].fastAccessDx(k) = urand.number();
316  }
317  FadType alpha(ndot, urand.number());
318  FadType beta(ndot, urand.number());
319  for (unsigned int k=0; k<ndot; k++) {
320  alpha.fastAccessDx(k) = urand.number();
321  beta.fastAccessDx(k) = urand.number();
322  }
323 
324  Teuchos::Time timer("Teuchos Fad GEMV", false);
325  timer.start(true);
326  for (unsigned int j=0; j<nloop; j++) {
327  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
328  }
329  timer.stop();
330 
331  return timer.totalElapsedTime() / nloop;
332 }
333 
334 template <typename FadType>
335 double
336 do_time_sacado_fad_dot(unsigned int m, unsigned int ndot,
337  unsigned int nloop, bool use_dynamic)
338 {
339  Sacado::Random<double> urand(0.0, 1.0);
340  unsigned int sz = 2*m*(1+ndot);
341  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
342 
343  Sacado::Fad::Vector<unsigned int, FadType> X(m,ndot), Y(m,ndot);
344  for (unsigned int i=0; i<m; i++) {
345  X[i] = FadType(ndot, urand.number());
346  Y[i] = FadType(ndot, urand.number());
347  for (unsigned int k=0; k<ndot; k++) {
348  X[i].fastAccessDx(k) = urand.number();
349  Y[i].fastAccessDx(k) = urand.number();
350  }
351  }
352 
353  Teuchos::Time timer("Teuchos Fad DOT", false);
354  timer.start(true);
355  for (unsigned int j=0; j<nloop; j++) {
356  FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
357  }
358  timer.stop();
359 
360  return timer.totalElapsedTime() / nloop;
361 }
362 
363 int main(int argc, char* argv[]) {
364  int ierr = 0;
365 
366  try {
367  double t, tb;
368  int p = 2;
369  int w = p+7;
370 
371  // Set up command line options
373  clp.setDocString("This program tests the speed of differentiating BLAS routines using Fad");
374  int m = 10;
375  clp.setOption("m", &m, "Number of rows");
376  int n = 10;
377  clp.setOption("n", &n, "Number of columns");
378  int k = 10;
379  clp.setOption("k", &k, "Number of columns for GEMM");
380  int ndot = 10;
381  clp.setOption("ndot", &ndot, "Number of derivative components");
382  int nloop = 100000;
383  clp.setOption("nloop", &nloop, "Number of loops");
384  int dynamic = 1;
385  clp.setOption("dynamic", &dynamic, "Use dynamic allocation");
386 
387  // Parse options
389  parseReturn= clp.parse(argc, argv);
391  return 1;
392  bool use_dynamic = (dynamic != 0);
393 
394  std::cout.setf(std::ios::scientific);
395  std::cout.precision(p);
396  std::cout << "Times (sec) for m = " << m << ", n = " << n
397  << ", ndot = " << ndot << ", nloop = " << nloop
398  << ", dynamic = " << use_dynamic << ": "
399  << std::endl;
400 
401  tb = do_time_teuchos_double_gemm(m,n,k,nloop);
402  std::cout << "GEMM: " << std::setw(w) << tb << std::endl;
403 
404  t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
405  std::cout << "Sacado DVFad GEMM: " << std::setw(w) << t << "\t"
406  << std::setw(w) << t/tb << std::endl;
407 
408  t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
409  std::cout << "Sacado DFad GEMM: " << std::setw(w) << t << "\t"
410  << std::setw(w) << t/tb << std::endl;
411 
412  t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
413  std::cout << "Teuchos DFad GEMM: " << std::setw(w) << t << "\t"
414  << std::setw(w) << t/tb << std::endl;
415 
416  // t = do_time_teuchos_fad_gemm< Sacado::ELRFad::DFad<double> >(m,n,k,ndot,nloop);
417  // std::cout << "Teuchos ELRDFad GEMM: " << std::setw(w) << t << "\t"
418  // << std::setw(w) << t/tb << std::endl;
419 
420  t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
421  std::cout << "Teuchos DVFad GEMM: " << std::setw(w) << t << "\t"
422  << std::setw(w) << t/tb << std::endl;
423 
424  std::cout << std::endl;
425 
426  tb = do_time_teuchos_double_gemv(m,n,nloop);
427  std::cout << "GEMV: " << std::setw(w) << tb << std::endl;
428 
429  t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
430  std::cout << "Sacado DVFad GEMV: " << std::setw(w) << t << "\t"
431  << std::setw(w) << t/tb << std::endl;
432 
433  t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
434  std::cout << "Sacado DFad GEMV: " << std::setw(w) << t << "\t"
435  << std::setw(w) << t/tb << std::endl;
436 
437  t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
438  std::cout << "Teuchos DFad GEMV: " << std::setw(w) << t << "\t"
439  << std::setw(w) << t/tb << std::endl;
440 
441  // t = do_time_teuchos_fad_gemv< Sacado::ELRFad::DFad<double> >(m,n,ndot,nloop*10);
442  // std::cout << "Teuchos ELRDFad GEMV: " << std::setw(w) << t << "\t"
443  // << std::setw(w) << t/tb << std::endl;
444 
445  t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
446  std::cout << "Teuchos DVFad GEMV: " << std::setw(w) << t << "\t"
447  << std::setw(w) << t/tb << std::endl;
448 
449  std::cout << std::endl;
450 
451  tb = do_time_teuchos_double_dot(m,nloop*100);
452  std::cout << "DOT: " << std::setw(w) << tb << std::endl;
453 
454  t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
455  std::cout << "Sacado DVFad DOT: " << std::setw(w) << t << "\t"
456  << std::setw(w) << t/tb << std::endl;
457 
458  t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
459  std::cout << "Sacado DFad DOT: " << std::setw(w) << t << "\t"
460  << std::setw(w) << t/tb << std::endl;
461 
462  t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
463  std::cout << "Teuchos DFad DOT: " << std::setw(w) << t << "\t"
464  << std::setw(w) << t/tb << std::endl;
465 
466  // t = do_time_teuchos_fad_dot< Sacado::ELRFad::DFad<double> >(m,ndot,nloop*100);
467  // std::cout << "Teuchos ELRDFad DOT: " << std::setw(w) << t << "\t"
468  // << std::setw(w) << t/tb << std::endl;
469 
470  t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
471  std::cout << "Teuchos DVFad DOT: " << std::setw(w) << t << "\t"
472  << std::setw(w) << t/tb << std::endl;
473 
474  }
475  catch (std::exception& e) {
476  std::cout << e.what() << std::endl;
477  ierr = 1;
478  }
479  catch (const char *s) {
480  std::cout << s << std::endl;
481  ierr = 1;
482  }
483  catch (...) {
484  std::cout << "Caught unknown exception!" << std::endl;
485  ierr = 1;
486  }
487 
488  return ierr;
489 }
const char * p
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
double do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:174
Sacado::Fad::DFad< double > FadType
double DOT(const int &n, const double *x, const int &incx, const double *y, const int &incy) const
double do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
Definition: fad_blas.cpp:75
ScalarT number()
Get random number.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
double do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int nloop)
Definition: fad_blas.cpp:44
void start(bool reset=false)
#define C(x)
double do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:293
#define A
Definition: Sacado_rad.hpp:572
double stop()
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
int main()
Definition: ad_example.cpp:191
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
double do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:243
void GEMM(ETransp transa, ETransp transb, const int &m, const int &n, const int &k, const double &alpha, const double *A, const int &lda, const double *B, const int &ldb, const double &beta, double *C, const int &ldc) const
double do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
Definition: fad_blas.cpp:102
Uncopyable z
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
double do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:336
void setDocString(const char doc_string[])
void GEMV(ETransp trans, const int &m, const int &n, const double &alpha, const double *A, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy) const
double do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:216
double totalElapsedTime(bool readCurrentTime=false) const
double do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:126
int n