Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fad_blas.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Sacado_Random.hpp"
11 #include "Sacado_No_Kokkos.hpp"
12 #include "Teuchos_BLAS.hpp"
13 #include "Sacado_Fad_BLAS.hpp"
14 
15 #include "Teuchos_Time.hpp"
17 
18 // A performance test that compares the cost of differentiating BLAS routines
19 // with Fad
20 
21 double
22 do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k,
23  unsigned int nloop)
24 {
25  Sacado::Random<double> urand(0.0, 1.0);
27 
28  std::vector<double> A(m*k), B(k*n), C(m*n);
29  for (unsigned int j=0; j<k; j++)
30  for (unsigned int i=0; i<m; i++)
31  A[i+j*m] = urand.number();
32  for (unsigned int j=0; j<n; j++)
33  for (unsigned int i=0; i<k; i++)
34  B[i+j*k] = urand.number();
35  for (unsigned int j=0; j<n; j++)
36  for (unsigned int i=0; i<m; i++)
37  C[i+j*m] = urand.number();
38  double alpha = urand.number();
39  double beta = urand.number();
40 
41  Teuchos::Time timer("Teuchos Double GEMM", false);
42  timer.start(true);
43  for (unsigned int j=0; j<nloop; j++) {
44  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
45  &B[0], k, beta, &C[0], m);
46  }
47  timer.stop();
48 
49  return timer.totalElapsedTime() / nloop;
50 }
51 
52 double
53 do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
54 {
55  Sacado::Random<double> urand(0.0, 1.0);
57 
58  std::vector<double> A(m*n), B(n), C(m);
59  for (unsigned int j=0; j<n; j++) {
60  for (unsigned int i=0; i<m; i++)
61  A[i+j*m] = urand.number();
62  B[j] = urand.number();
63  }
64  for (unsigned int i=0; i<m; i++)
65  C[i] = urand.number();
66  double alpha = urand.number();
67  double beta = urand.number();
68 
69  Teuchos::Time timer("Teuchos Double GEMV", false);
70  timer.start(true);
71  for (unsigned int j=0; j<nloop; j++) {
72  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
73  }
74  timer.stop();
75 
76  return timer.totalElapsedTime() / nloop;
77 }
78 
79 double
80 do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
81 {
82  Sacado::Random<double> urand(0.0, 1.0);
84 
85  std::vector<double> X(m), Y(m);
86  for (unsigned int i=0; i<m; i++) {
87  X[i] = urand.number();
88  Y[i] = urand.number();
89  }
90 
91  Teuchos::Time timer("Teuchos Double DOT", false);
92  timer.start(true);
93  double z = 0.0;
94  for (unsigned int j=0; j<nloop; j++) {
95  z += blas.DOT(m, &X[0], 1, &Y[0], 1);
96  }
97  timer.stop();
98 
99  return timer.totalElapsedTime() / nloop;
100 }
101 
102 template <typename FadType>
103 double
104 do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
105  unsigned int ndot, unsigned int nloop)
106 {
107  Sacado::Random<double> urand(0.0, 1.0);
109 
110  std::vector<FadType> A(m*k), B(k*n), C(m*n);
111  for (unsigned int j=0; j<k; j++) {
112  for (unsigned int i=0; i<m; i++) {
113  A[i+j*m] = FadType(ndot, urand.number());
114  for (unsigned int l=0; l<ndot; l++)
115  A[i+j*m].fastAccessDx(l) = urand.number();
116  }
117  }
118  for (unsigned int j=0; j<n; j++) {
119  for (unsigned int i=0; i<k; i++) {
120  B[i+j*k] = FadType(ndot, urand.number());
121  for (unsigned int l=0; l<ndot; l++)
122  B[i+j*k].fastAccessDx(l) = urand.number();
123  }
124  }
125  for (unsigned int j=0; j<n; j++) {
126  for (unsigned int i=0; i<m; i++) {
127  C[i+j*m] = FadType(ndot, urand.number());
128  for (unsigned int l=0; l<ndot; l++)
129  C[i+j*m].fastAccessDx(l) = urand.number();
130  }
131  }
132  FadType alpha(ndot, urand.number());
133  FadType beta(ndot, urand.number());
134  for (unsigned int l=0; l<ndot; l++) {
135  alpha.fastAccessDx(l) = urand.number();
136  beta.fastAccessDx(l) = urand.number();
137  }
138 
139  Teuchos::Time timer("Teuchos Fad GEMM", false);
140  timer.start(true);
141  for (unsigned int j=0; j<nloop; j++) {
142  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
143  &B[0], k, beta, &C[0], m);
144  }
145  timer.stop();
146 
147  return timer.totalElapsedTime() / nloop;
148 }
149 
150 template <typename FadType>
151 double
152 do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
153  unsigned int nloop)
154 {
155  Sacado::Random<double> urand(0.0, 1.0);
157 
158  std::vector<FadType> A(m*n), B(n), C(m);
159  for (unsigned int j=0; j<n; j++) {
160  for (unsigned int i=0; i<m; i++) {
161  //A[i+j*m] = urand.number();
162  A[i+j*m] = FadType(ndot, urand.number());
163  for (unsigned int k=0; k<ndot; k++)
164  A[i+j*m].fastAccessDx(k) = urand.number();
165  }
166  B[j] = FadType(ndot, urand.number());
167  for (unsigned int k=0; k<ndot; k++)
168  B[j].fastAccessDx(k) = urand.number();
169  }
170  for (unsigned int i=0; i<m; i++) {
171  C[i] = FadType(ndot, urand.number());
172  for (unsigned int k=0; k<ndot; k++)
173  C[i].fastAccessDx(k) = urand.number();
174  }
175  FadType alpha(ndot, urand.number());
176  FadType beta(ndot, urand.number());
177  for (unsigned int k=0; k<ndot; k++) {
178  alpha.fastAccessDx(k) = urand.number();
179  beta.fastAccessDx(k) = urand.number();
180  }
181 
182  Teuchos::Time timer("Teuchos Fad GEMV", false);
183  timer.start(true);
184  for (unsigned int j=0; j<nloop; j++) {
185  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
186  }
187  timer.stop();
188 
189  return timer.totalElapsedTime() / nloop;
190 }
191 
192 template <typename FadType>
193 double
194 do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
195 {
196  Sacado::Random<double> urand(0.0, 1.0);
198 
199  std::vector<FadType> X(m), Y(m);
200  for (unsigned int i=0; i<m; i++) {
201  X[i] = FadType(ndot, urand.number());
202  Y[i] = FadType(ndot, urand.number());
203  for (unsigned int k=0; k<ndot; k++) {
204  X[i].fastAccessDx(k) = urand.number();
205  Y[i].fastAccessDx(k) = urand.number();
206  }
207  }
208 
209  Teuchos::Time timer("Teuchos Fad DOT", false);
210  timer.start(true);
211  for (unsigned int j=0; j<nloop; j++) {
212  FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
213  }
214  timer.stop();
215 
216  return timer.totalElapsedTime() / nloop;
217 }
218 
219 template <typename FadType>
220 double
221 do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
222  unsigned int ndot, unsigned int nloop, bool use_dynamic)
223 {
224  Sacado::Random<double> urand(0.0, 1.0);
225  unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
226  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
227 
228  Sacado::Fad::Vector<unsigned int, FadType> A(m*k,ndot), B(k*n,ndot), C
229  (m*n,ndot);
230  for (unsigned int j=0; j<k; j++) {
231  for (unsigned int i=0; i<m; i++) {
232  A[i+j*m] = FadType(ndot, urand.number());
233  for (unsigned int l=0; l<ndot; l++)
234  A[i+j*m].fastAccessDx(l) = urand.number();
235  }
236  }
237  for (unsigned int j=0; j<n; j++) {
238  for (unsigned int i=0; i<k; i++) {
239  B[i+j*k] = FadType(ndot, urand.number());
240  for (unsigned int l=0; l<ndot; l++)
241  B[i+j*k].fastAccessDx(l) = urand.number();
242  }
243  }
244  for (unsigned int j=0; j<n; j++) {
245  for (unsigned int i=0; i<m; i++) {
246  C[i+j*m] = FadType(ndot, urand.number());
247  for (unsigned int l=0; l<ndot; l++)
248  C[i+j*m].fastAccessDx(l) = urand.number();
249  }
250  }
251  FadType alpha(ndot, urand.number());
252  FadType beta(ndot, urand.number());
253  for (unsigned int l=0; l<ndot; l++) {
254  alpha.fastAccessDx(l) = urand.number();
255  beta.fastAccessDx(l) = urand.number();
256  }
257 
258  Teuchos::Time timer("Teuchos Fad GEMM", false);
259  timer.start(true);
260  for (unsigned int j=0; j<nloop; j++) {
261  blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
262  &B[0], k, beta, &C[0], m);
263  }
264  timer.stop();
265 
266  return timer.totalElapsedTime() / nloop;
267 }
268 
269 template <typename FadType>
270 double
271 do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
272  unsigned int nloop, bool use_dynamic)
273 {
274  Sacado::Random<double> urand(0.0, 1.0);
275  unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
276  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
277 
278  Sacado::Fad::Vector<unsigned int, FadType> A(m*n,ndot), B(n,ndot), C(m,ndot);
279  for (unsigned int j=0; j<n; j++) {
280  for (unsigned int i=0; i<m; i++) {
281  //A[i+j*m] = urand.number();
282  A[i+j*m] = FadType(ndot, urand.number());
283  for (unsigned int k=0; k<ndot; k++)
284  A[i+j*m].fastAccessDx(k) = urand.number();
285  }
286  B[j] = FadType(ndot, urand.number());
287  for (unsigned int k=0; k<ndot; k++)
288  B[j].fastAccessDx(k) = urand.number();
289  }
290  for (unsigned int i=0; i<m; i++) {
291  C[i] = FadType(ndot, urand.number());
292  for (unsigned int k=0; k<ndot; k++)
293  C[i].fastAccessDx(k) = urand.number();
294  }
295  FadType alpha(ndot, urand.number());
296  FadType beta(ndot, urand.number());
297  for (unsigned int k=0; k<ndot; k++) {
298  alpha.fastAccessDx(k) = urand.number();
299  beta.fastAccessDx(k) = urand.number();
300  }
301 
302  Teuchos::Time timer("Teuchos Fad GEMV", false);
303  timer.start(true);
304  for (unsigned int j=0; j<nloop; j++) {
305  blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
306  }
307  timer.stop();
308 
309  return timer.totalElapsedTime() / nloop;
310 }
311 
312 template <typename FadType>
313 double
314 do_time_sacado_fad_dot(unsigned int m, unsigned int ndot,
315  unsigned int nloop, bool use_dynamic)
316 {
317  Sacado::Random<double> urand(0.0, 1.0);
318  unsigned int sz = 2*m*(1+ndot);
319  Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
320 
321  Sacado::Fad::Vector<unsigned int, FadType> X(m,ndot), Y(m,ndot);
322  for (unsigned int i=0; i<m; i++) {
323  X[i] = FadType(ndot, urand.number());
324  Y[i] = FadType(ndot, urand.number());
325  for (unsigned int k=0; k<ndot; k++) {
326  X[i].fastAccessDx(k) = urand.number();
327  Y[i].fastAccessDx(k) = urand.number();
328  }
329  }
330 
331  Teuchos::Time timer("Teuchos Fad DOT", false);
332  timer.start(true);
333  for (unsigned int j=0; j<nloop; j++) {
334  FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
335  }
336  timer.stop();
337 
338  return timer.totalElapsedTime() / nloop;
339 }
340 
341 int main(int argc, char* argv[]) {
342  int ierr = 0;
343 
344  try {
345  double t, tb;
346  int p = 2;
347  int w = p+7;
348 
349  // Set up command line options
351  clp.setDocString("This program tests the speed of differentiating BLAS routines using Fad");
352  int m = 10;
353  clp.setOption("m", &m, "Number of rows");
354  int n = 10;
355  clp.setOption("n", &n, "Number of columns");
356  int k = 10;
357  clp.setOption("k", &k, "Number of columns for GEMM");
358  int ndot = 10;
359  clp.setOption("ndot", &ndot, "Number of derivative components");
360  int nloop = 100000;
361  clp.setOption("nloop", &nloop, "Number of loops");
362  int dynamic = 1;
363  clp.setOption("dynamic", &dynamic, "Use dynamic allocation");
364 
365  // Parse options
367  parseReturn= clp.parse(argc, argv);
369  return 1;
370  bool use_dynamic = (dynamic != 0);
371 
372  std::cout.setf(std::ios::scientific);
373  std::cout.precision(p);
374  std::cout << "Times (sec) for m = " << m << ", n = " << n
375  << ", ndot = " << ndot << ", nloop = " << nloop
376  << ", dynamic = " << use_dynamic << ": "
377  << std::endl;
378 
379  tb = do_time_teuchos_double_gemm(m,n,k,nloop);
380  std::cout << "GEMM: " << std::setw(w) << tb << std::endl;
381 
382  t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
383  std::cout << "Sacado DVFad GEMM: " << std::setw(w) << t << "\t"
384  << std::setw(w) << t/tb << std::endl;
385 
386  t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
387  std::cout << "Sacado DFad GEMM: " << std::setw(w) << t << "\t"
388  << std::setw(w) << t/tb << std::endl;
389 
390  t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
391  std::cout << "Teuchos DFad GEMM: " << std::setw(w) << t << "\t"
392  << std::setw(w) << t/tb << std::endl;
393 
394  // t = do_time_teuchos_fad_gemm< Sacado::ELRFad::DFad<double> >(m,n,k,ndot,nloop);
395  // std::cout << "Teuchos ELRDFad GEMM: " << std::setw(w) << t << "\t"
396  // << std::setw(w) << t/tb << std::endl;
397 
398  t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
399  std::cout << "Teuchos DVFad GEMM: " << std::setw(w) << t << "\t"
400  << std::setw(w) << t/tb << std::endl;
401 
402  std::cout << std::endl;
403 
404  tb = do_time_teuchos_double_gemv(m,n,nloop);
405  std::cout << "GEMV: " << std::setw(w) << tb << std::endl;
406 
407  t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
408  std::cout << "Sacado DVFad GEMV: " << std::setw(w) << t << "\t"
409  << std::setw(w) << t/tb << std::endl;
410 
411  t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
412  std::cout << "Sacado DFad GEMV: " << std::setw(w) << t << "\t"
413  << std::setw(w) << t/tb << std::endl;
414 
415  t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
416  std::cout << "Teuchos DFad GEMV: " << std::setw(w) << t << "\t"
417  << std::setw(w) << t/tb << std::endl;
418 
419  // t = do_time_teuchos_fad_gemv< Sacado::ELRFad::DFad<double> >(m,n,ndot,nloop*10);
420  // std::cout << "Teuchos ELRDFad GEMV: " << std::setw(w) << t << "\t"
421  // << std::setw(w) << t/tb << std::endl;
422 
423  t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
424  std::cout << "Teuchos DVFad GEMV: " << std::setw(w) << t << "\t"
425  << std::setw(w) << t/tb << std::endl;
426 
427  std::cout << std::endl;
428 
429  tb = do_time_teuchos_double_dot(m,nloop*100);
430  std::cout << "DOT: " << std::setw(w) << tb << std::endl;
431 
432  t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
433  std::cout << "Sacado DVFad DOT: " << std::setw(w) << t << "\t"
434  << std::setw(w) << t/tb << std::endl;
435 
436  t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
437  std::cout << "Sacado DFad DOT: " << std::setw(w) << t << "\t"
438  << std::setw(w) << t/tb << std::endl;
439 
440  t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
441  std::cout << "Teuchos DFad DOT: " << std::setw(w) << t << "\t"
442  << std::setw(w) << t/tb << std::endl;
443 
444  // t = do_time_teuchos_fad_dot< Sacado::ELRFad::DFad<double> >(m,ndot,nloop*100);
445  // std::cout << "Teuchos ELRDFad DOT: " << std::setw(w) << t << "\t"
446  // << std::setw(w) << t/tb << std::endl;
447 
448  t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
449  std::cout << "Teuchos DVFad DOT: " << std::setw(w) << t << "\t"
450  << std::setw(w) << t/tb << std::endl;
451 
452  }
453  catch (std::exception& e) {
454  std::cout << e.what() << std::endl;
455  ierr = 1;
456  }
457  catch (const char *s) {
458  std::cout << s << std::endl;
459  ierr = 1;
460  }
461  catch (...) {
462  std::cout << "Caught unknown exception!" << std::endl;
463  ierr = 1;
464  }
465 
466  return ierr;
467 }
const char * p
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
double do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:152
Sacado::Fad::DFad< double > FadType
double DOT(const int &n, const double *x, const int &incx, const double *y, const int &incy) const
double do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
Definition: fad_blas.cpp:53
ScalarT number()
Get random number.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
double do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int nloop)
Definition: fad_blas.cpp:22
void start(bool reset=false)
#define C(x)
double do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:271
#define A
Definition: Sacado_rad.hpp:552
double stop()
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
int main()
Definition: ad_example.cpp:171
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
double do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:221
void GEMM(ETransp transa, ETransp transb, const int &m, const int &n, const int &k, const double &alpha, const double *A, const int &lda, const double *B, const int &ldb, const double &beta, double *C, const int &ldc) const
double do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
Definition: fad_blas.cpp:80
Uncopyable z
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
double do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:314
void setDocString(const char doc_string[])
void GEMV(ETransp trans, const int &m, const int &n, const double &alpha, const double *A, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy) const
double do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:194
double totalElapsedTime(bool readCurrentTime=false) const
double do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:104
int n