Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadBLASUnitTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef FADBLASUNITTESTS_HPP
11 #define FADBLASUNITTESTS_HPP
12 
13 // Sacado includes
14 #include "Sacado_No_Kokkos.hpp"
15 #include "Sacado_Fad_BLAS.hpp"
16 #include "Sacado_Random.hpp"
17 
18 // gtest includes
19 #include <gtest/gtest.h>
20 
21 #include "GTestUtils.hpp"
22 
23 #define COMPARE_FAD_VECTORS(X1, X2, n) \
24  ASSERT_TRUE(X1.size() == std::size_t(n)); \
25  ASSERT_TRUE(X2.size() == std::size_t(n)); \
26  for (unsigned int i=0; i<n; i++) { \
27  COMPARE_FADS(X1[i], X2[i]); \
28  } \
29  ;
30 
31 // A class for testing differentiated BLAS operations for general Fad types
32 template <class FadType>
34 protected:
37 
38  // Random number generator
40 
41  // Real random number generator for derivative components
43 
44  // Number of matrix rows
45  unsigned int m_;
46 
47  // Number of matrix columns
48  unsigned int n_;
49 
50  // Number of matrix columns for level 3 blas
51  unsigned int l_;
52 
53  // Number of derivative components
54  unsigned int ndot_;
55 
56  // Tolerances to which fad objects should be the same
57  double tol_a, tol_r;
58 
60 
62  urand(), real_urand(), m_(5), n_(6), l_(4), ndot_(7),
63  tol_a(1.0e-11), tol_r(1.0e-11) {}
64 
65 }; // class FadBLASUnitTests
66 
68 
69 // Tests all arguments
71  typedef decltype(this->fad) FadType;
72  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
74  auto m = this->m_;
75  auto ndot = this->ndot_;
76 
77  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
78  for (unsigned int i=0; i<m; i++) {
79  ScalarType val = this->urand.number();
80  x1[i] = FadType(ndot, val);
81  x2[i] = FadType(ndot, val);
82  x3[i] = FadType(ndot, val);
83  for (unsigned int k=0; k<ndot; k++) {
84  val = this->urand.number();
85  x1[i].fastAccessDx(k) = val;
86  x2[i].fastAccessDx(k) = val;
87  x3[i].fastAccessDx(k) = val;
88  }
89  }
90  FadType alpha(ndot, this->urand.number());
91  for (unsigned int k=0; k<ndot; k++) {
92  alpha.fastAccessDx(k) = this->urand.number();
93  }
94 
95  Teuchos::BLAS<int,FadType> teuchos_blas;
96  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
97 
98  Teuchos::BLAS<int,FadType> sacado_blas(false);
99  sacado_blas.SCAL(m, alpha, &x2[0], 1);
100 
101  COMPARE_FAD_VECTORS(x1, x2, m);
102 
103  unsigned int sz = m*(1+ndot);
104  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
105  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
106 
107  COMPARE_FAD_VECTORS(x1, x3, m);
108 }
109 
110 // Tests non-unit inc
112  typedef decltype(this->fad) FadType;
113  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
115  auto m = this->m_;
116  auto ndot = this->ndot_;
117 
118  unsigned int incx = 2;
119  VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
120  for (unsigned int i=0; i<m*incx; i++) {
121  ScalarType val = this->urand.number();
122  x1[i] = FadType(ndot, val);
123  x2[i] = FadType(ndot, val);
124  x3[i] = FadType(ndot, val);
125  for (unsigned int k=0; k<ndot; k++) {
126  val = this->urand.number();
127  x1[i].fastAccessDx(k) = val;
128  x2[i].fastAccessDx(k) = val;
129  x3[i].fastAccessDx(k) = val;
130  }
131  }
132  FadType alpha(ndot, this->urand.number());
133  for (unsigned int k=0; k<ndot; k++) {
134  alpha.fastAccessDx(k) = this->urand.number();
135  }
136 
137  Teuchos::BLAS<int,FadType> teuchos_blas;
138  teuchos_blas.SCAL(m, alpha, &x1[0], incx);
139 
140  Teuchos::BLAS<int,FadType> sacado_blas(false);
141  sacado_blas.SCAL(m, alpha, &x2[0], incx);
142 
143  COMPARE_FAD_VECTORS(x1, x2, m*incx);
144 
145  unsigned int sz = m*(1+ndot);
146  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
147  sacado_blas2.SCAL(m, alpha, &x3[0], incx);
148 
149  COMPARE_FAD_VECTORS(x1, x3, m*incx);
150 }
151 
152 // Tests constant alpha
154  typedef decltype(this->fad) FadType;
155  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
157  auto m = this->m_;
158  auto ndot = this->ndot_;
159 
160  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
161  for (unsigned int i=0; i<m; i++) {
162  ScalarType val = this->urand.number();
163  x1[i] = FadType(ndot, val);
164  x2[i] = FadType(ndot, val);
165  x3[i] = FadType(ndot, val);
166  for (unsigned int k=0; k<ndot; k++) {
167  val = this->urand.number();
168  x1[i].fastAccessDx(k) = val;
169  x2[i].fastAccessDx(k) = val;
170  x3[i].fastAccessDx(k) = val;
171  }
172  }
173  ScalarType alpha = this->urand.number();
174 
175  Teuchos::BLAS<int,FadType> teuchos_blas;
176  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
177 
178  Teuchos::BLAS<int,FadType> sacado_blas(false);
179  sacado_blas.SCAL(m, alpha, &x2[0], 1);
180 
181  COMPARE_FAD_VECTORS(x1, x2, m);
182 
183  unsigned int sz = m*(1+ndot);
184  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
185  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
186 
187  COMPARE_FAD_VECTORS(x1, x3, m);
188 }
189 
190 // Tests constant x
192  typedef decltype(this->fad) FadType;
193  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
195  auto m = this->m_;
196  auto ndot = this->ndot_;
197 
198  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
199  for (unsigned int i=0; i<m; i++) {
200  ScalarType val = this->urand.number();
201  x1[i] = val;
202  x2[i] = val;
203  x3[i] = val;
204  }
205  FadType alpha = FadType(ndot, this->urand.number());
206  for (unsigned int k=0; k<ndot; k++)
207  alpha.fastAccessDx(k) = this->urand.number();
208 
209  Teuchos::BLAS<int,FadType> teuchos_blas;
210  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
211 
212  Teuchos::BLAS<int,FadType> sacado_blas(false);
213  sacado_blas.SCAL(m, alpha, &x2[0], 1);
214 
215  COMPARE_FAD_VECTORS(x1, x2, m);
216 
217  unsigned int sz = m*(1+ndot);
218  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
219  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
220 
221  COMPARE_FAD_VECTORS(x1, x3, m);
222 }
223 
224 // Tests all arguments
226  typedef decltype(this->fad) FadType;
227  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
229  auto m = this->m_;
230  auto ndot = this->ndot_;
231 
232  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
233  for (unsigned int i=0; i<m; i++) {
234  x[i] = FadType(ndot, this->urand.number());
235  ScalarType val = this->urand.number();
236  y1[i] = FadType(ndot, val);
237  y2[i] = FadType(ndot, val);
238  y3[i] = FadType(ndot, val);
239  for (unsigned int k=0; k<ndot; k++) {
240  x[i].fastAccessDx(k) = this->urand.number();
241  val = this->urand.number();
242  y1[i].fastAccessDx(k) = val;
243  y2[i].fastAccessDx(k) = val;
244  y3[i].fastAccessDx(k) = val;
245  }
246  }
247 
248  Teuchos::BLAS<int,FadType> teuchos_blas;
249  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
250 
251  Teuchos::BLAS<int,FadType> sacado_blas(false);
252  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
253 
254  COMPARE_FAD_VECTORS(y1, y2, m);
255 
256  unsigned int sz = 2*m*(1+ndot);
257  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
258  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
259 
260  COMPARE_FAD_VECTORS(y1, y3, m);
261 }
262 
263 // Tests non unit inc
265  typedef decltype(this->fad) FadType;
266  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
268  auto m = this->m_;
269  auto ndot = this->ndot_;
270 
271  unsigned int incx = 2;
272  unsigned int incy = 3;
273  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
274  for (unsigned int i=0; i<m*incx; i++) {
275  x[i] = FadType(ndot, this->urand.number());
276  for (unsigned int k=0; k<ndot; k++) {
277  x[i].fastAccessDx(k) = this->urand.number();
278  }
279  }
280  for (unsigned int i=0; i<m*incy; i++) {
281  ScalarType val = this->urand.number();
282  y1[i] = FadType(ndot, val);
283  y2[i] = FadType(ndot, val);
284  y3[i] = FadType(ndot, val);
285  for (unsigned int k=0; k<ndot; k++) {
286  val = this->urand.number();
287  y1[i].fastAccessDx(k) = val;
288  y2[i].fastAccessDx(k) = val;
289  y3[i].fastAccessDx(k) = val;
290  }
291  }
292 
293  Teuchos::BLAS<int,FadType> teuchos_blas;
294  teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
295 
296  Teuchos::BLAS<int,FadType> sacado_blas(false);
297  sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
298 
299  COMPARE_FAD_VECTORS(y1, y2, m*incy);
300 
301  unsigned int sz = 2*m*(1+ndot);
302  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
303  sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
304 
305  COMPARE_FAD_VECTORS(y1, y3, m*incy);
306 }
307 
308 // Tests x constant
310  typedef decltype(this->fad) FadType;
311  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
313  auto m = this->m_;
314  auto ndot = this->ndot_;
315 
316  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
317  for (unsigned int i=0; i<m; i++) {
318  x[i] = this->urand.number();
319  }
320  for (unsigned int i=0; i<m; i++) {
321  ScalarType val = this->urand.number();
322  y1[i] = FadType(ndot, val);
323  y2[i] = FadType(ndot, val);
324  y3[i] = FadType(ndot, val);
325  for (unsigned int k=0; k<ndot; k++) {
326  val = this->urand.number();
327  y1[i].fastAccessDx(k) = val;
328  y2[i].fastAccessDx(k) = val;
329  y3[i].fastAccessDx(k) = val;
330  }
331  }
332 
333  Teuchos::BLAS<int,FadType> teuchos_blas;
334  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
335 
336  Teuchos::BLAS<int,FadType> sacado_blas(false);
337  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
338 
339  COMPARE_FAD_VECTORS(y1, y2, m);
340 
341  unsigned int sz = 2*m*(1+ndot);
342  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
343  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
344 
345  COMPARE_FAD_VECTORS(y1, y3, m);
346 }
347 
348 // Tests y constant
350  typedef decltype(this->fad) FadType;
351  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
353  auto m = this->m_;
354  auto ndot = this->ndot_;
355 
356  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
357  for (unsigned int i=0; i<m; i++) {
358  x[i] = FadType(ndot, this->urand.number());
359  ScalarType val = this->urand.number();
360  y1[i] = val;
361  y2[i] = val;
362  y3[i] = val;
363  for (unsigned int k=0; k<ndot; k++) {
364  x[i].fastAccessDx(k) = this->urand.number();
365  }
366  }
367 
368  Teuchos::BLAS<int,FadType> teuchos_blas;
369  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
370 
371  Teuchos::BLAS<int,FadType> sacado_blas(false);
372  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
373 
374  COMPARE_FAD_VECTORS(y1, y2, m);
375 
376  unsigned int sz = 2*m*(1+ndot);
377  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
378  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
379 
380  COMPARE_FAD_VECTORS(y1, y3, m);
381 }
382 
383 // Tests all arguments
385  typedef decltype(this->fad) FadType;
386  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
388  auto m = this->m_;
389  auto ndot = this->ndot_;
390 
391  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
392  for (unsigned int i=0; i<m; i++) {
393  x[i] = FadType(ndot, this->urand.number());
394  ScalarType val = this->urand.number();
395  y1[i] = FadType(ndot, val);
396  y2[i] = FadType(ndot, val);
397  y3[i] = FadType(ndot, val);
398  for (unsigned int k=0; k<ndot; k++) {
399  x[i].fastAccessDx(k) = this->urand.number();
400  val = this->urand.number();
401  y1[i].fastAccessDx(k) = val;
402  y2[i].fastAccessDx(k) = val;
403  y3[i].fastAccessDx(k) = val;
404  }
405  }
406  FadType alpha(ndot, this->urand.number());
407  for (unsigned int k=0; k<ndot; k++)
408  alpha.fastAccessDx(k) = this->urand.number();
409 
410  Teuchos::BLAS<int,FadType> teuchos_blas;
411  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
412 
413  Teuchos::BLAS<int,FadType> sacado_blas(false);
414  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
415 
416  COMPARE_FAD_VECTORS(y1, y2, m);
417 
418  unsigned int sz = 2*m*(1+ndot);
419  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
420  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
421 
422  COMPARE_FAD_VECTORS(y1, y3, m);
423 }
424 
425 // Tests non unit inc
427  typedef decltype(this->fad) FadType;
428  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
430  auto m = this->m_;
431  auto ndot = this->ndot_;
432 
433  unsigned int incx = 2;
434  unsigned int incy = 3;
435  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
436  for (unsigned int i=0; i<m*incx; i++) {
437  x[i] = FadType(ndot, this->urand.number());
438  for (unsigned int k=0; k<ndot; k++) {
439  x[i].fastAccessDx(k) = this->urand.number();
440  }
441  }
442  for (unsigned int i=0; i<m*incy; i++) {
443  ScalarType val = this->urand.number();
444  y1[i] = FadType(ndot, val);
445  y2[i] = FadType(ndot, val);
446  y3[i] = FadType(ndot, val);
447  for (unsigned int k=0; k<ndot; k++) {
448  val = this->urand.number();
449  y1[i].fastAccessDx(k) = val;
450  y2[i].fastAccessDx(k) = val;
451  y3[i].fastAccessDx(k) = val;
452  }
453  }
454  FadType alpha(ndot, this->urand.number());
455  for (unsigned int k=0; k<ndot; k++)
456  alpha.fastAccessDx(k) = this->urand.number();
457 
458  Teuchos::BLAS<int,FadType> teuchos_blas;
459  teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
460 
461  Teuchos::BLAS<int,FadType> sacado_blas(false);
462  sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
463 
464  COMPARE_FAD_VECTORS(y1, y2, m*incy);
465 
466  unsigned int sz = 2*m*(1+ndot);
467  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
468  sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
469 
470  COMPARE_FAD_VECTORS(y1, y3, m*incy);
471 }
472 
473 // Tests x constant
475  typedef decltype(this->fad) FadType;
476  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
478  auto m = this->m_;
479  auto ndot = this->ndot_;
480 
481  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
482  std::vector<ScalarType> xx(m);
483  for (unsigned int i=0; i<m; i++) {
484  xx[i] = this->urand.number();
485  x[i] = xx[i];
486  ScalarType val = this->urand.number();
487  y1[i] = FadType(ndot, val);
488  y2[i] = FadType(ndot, val);
489  y3[i] = FadType(ndot, val);
490  y4[i] = FadType(ndot, val);
491  for (unsigned int k=0; k<ndot; k++) {
492  val = this->urand.number();
493  y1[i].fastAccessDx(k) = val;
494  y2[i].fastAccessDx(k) = val;
495  y3[i].fastAccessDx(k) = val;
496  y4[i].fastAccessDx(k) = val;
497  }
498  }
499  FadType alpha(ndot, this->urand.number());
500  for (unsigned int k=0; k<ndot; k++)
501  alpha.fastAccessDx(k) = this->urand.number();
502 
503  Teuchos::BLAS<int,FadType> teuchos_blas;
504  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
505 
506  Teuchos::BLAS<int,FadType> sacado_blas(false);
507  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
508 
509  COMPARE_FAD_VECTORS(y1, y2, m);
510 
511  unsigned int sz = m*(1+ndot)+m;
512  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
513  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
514 
515  COMPARE_FAD_VECTORS(y1, y3, m);
516 
517  sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
518 
519  COMPARE_FAD_VECTORS(y1, y4, m);
520 }
521 
522 // Tests y constant
524  typedef decltype(this->fad) FadType;
525  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
527  auto m = this->m_;
528  auto ndot = this->ndot_;
529 
530  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
531  for (unsigned int i=0; i<m; i++) {
532  x[i] = FadType(ndot, this->urand.number());
533  ScalarType val = this->urand.number();
534  y1[i] = val;
535  y2[i] = val;
536  y3[i] = val;
537  for (unsigned int k=0; k<ndot; k++) {
538  x[i].fastAccessDx(k) = this->urand.number();
539  }
540  }
541  FadType alpha(ndot, this->urand.number());
542  for (unsigned int k=0; k<ndot; k++)
543  alpha.fastAccessDx(k) = this->urand.number();
544 
545  Teuchos::BLAS<int,FadType> teuchos_blas;
546  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
547 
548  Teuchos::BLAS<int,FadType> sacado_blas(false);
549  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
550 
551  COMPARE_FAD_VECTORS(y1, y2, m);
552 
553  unsigned int sz = 2*m*(1+ndot);
554  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
555  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
556 
557  COMPARE_FAD_VECTORS(y1, y3, m);
558 }
559 
560 // Tests all arguments
562  typedef decltype(this->fad) FadType;
564  auto m = this->m_;
565  auto ndot = this->ndot_;
566 
567  VectorType X(m,ndot), Y(m,ndot);
568  for (unsigned int i=0; i<m; i++) {
569  X[i] = FadType(ndot, this->real_urand.number());
570  Y[i] = FadType(ndot, this->real_urand.number());
571  for (unsigned int k=0; k<ndot; k++) {
572  X[i].fastAccessDx(k) = this->real_urand.number();
573  Y[i].fastAccessDx(k) = this->real_urand.number();
574  }
575  }
576 
577  Teuchos::BLAS<int,FadType> teuchos_blas;
578  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
579 
580  Teuchos::BLAS<int,FadType> sacado_blas(false);
581  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
582 
583  COMPARE_FADS(z1, z2);
584 
585  unsigned int sz = 2*m*(1+ndot);
586  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
587  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
588 
589  COMPARE_FADS(z1, z3);
590 }
591 
592 // Tests non-unit inc
594  typedef decltype(this->fad) FadType;
596  auto m = this->m_;
597  auto ndot = this->ndot_;
598 
599  unsigned int incx = 2;
600  unsigned int incy = 3;
601  VectorType X(m*incx,ndot), Y(m*incy,ndot);
602  for (unsigned int i=0; i<m*incx; i++) {
603  X[i] = FadType(ndot, this->real_urand.number());
604  for (unsigned int k=0; k<ndot; k++) {
605  X[i].fastAccessDx(k) = this->real_urand.number();
606  }
607  }
608  for (unsigned int i=0; i<m*incy; i++) {
609  Y[i] = FadType(ndot, this->real_urand.number());
610  for (unsigned int k=0; k<ndot; k++) {
611  Y[i].fastAccessDx(k) = this->real_urand.number();
612  }
613  }
614 
615  Teuchos::BLAS<int,FadType> teuchos_blas;
616  FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
617 
618  Teuchos::BLAS<int,FadType> sacado_blas(false);
619  FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
620 
621  COMPARE_FADS(z1, z2);
622 
623  unsigned int sz = 2*m*(1+ndot);
624  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
625  FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
626 
627  COMPARE_FADS(z1, z3);
628 }
629 
630 // Tests X constant
632  typedef decltype(this->fad) FadType;
633  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
635  auto m = this->m_;
636  auto ndot = this->ndot_;
637 
638  VectorType X(m,0), Y(m,ndot);
639  std::vector<ScalarType> x(m);
640  for (unsigned int i=0; i<m; i++) {
641  x[i] = this->urand.number();
642  X[i] = x[i];
643  Y[i] = FadType(ndot, this->real_urand.number());
644  for (unsigned int k=0; k<ndot; k++) {
645  Y[i].fastAccessDx(k) = this->real_urand.number();
646  }
647  }
648 
649  Teuchos::BLAS<int,FadType> teuchos_blas;
650  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
651 
652  Teuchos::BLAS<int,FadType> sacado_blas(false);
653  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
654 
655  COMPARE_FADS(z1, z2);
656 
657  unsigned int sz = 2*m*(1+ndot);
658  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
659  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
660 
661  COMPARE_FADS(z1, z3);
662 
663  FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
664 
665  COMPARE_FADS(z1, z4);
666 }
667 
668 // Tests Y constant
670  typedef decltype(this->fad) FadType;
671  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
673  auto m = this->m_;
674  auto ndot = this->ndot_;
675 
676  VectorType X(m,ndot), Y(m,0);
677  std::vector<ScalarType> y(m);
678  for (unsigned int i=0; i<m; i++) {
679  X[i] = FadType(ndot, this->real_urand.number());
680  y[i] = this->urand.number();
681  Y[i] = y[i];
682  for (unsigned int k=0; k<ndot; k++) {
683  X[i].fastAccessDx(k) = this->real_urand.number();
684  }
685  }
686 
687  Teuchos::BLAS<int,FadType> teuchos_blas;
688  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
689 
690  Teuchos::BLAS<int,FadType> sacado_blas(false);
691  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
692 
693  COMPARE_FADS(z1, z2);
694 
695  unsigned int sz = 2*m*(1+ndot);
696  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
697  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
698 
699  COMPARE_FADS(z1, z3);
700 
701  FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
702 
703  COMPARE_FADS(z1, z4);
704 }
705 
706 // Tests all arguments
708  typedef decltype(this->fad) FadType;
710  auto m = this->m_;
711  auto ndot = this->ndot_;
712 
713  VectorType X(m,ndot);
714  for (unsigned int i=0; i<m; i++) {
715  X[i] = FadType(ndot, this->real_urand.number());
716  for (unsigned int k=0; k<ndot; k++) {
717  X[i].fastAccessDx(k) = this->real_urand.number();
718  }
719  }
720 
721  Teuchos::BLAS<int,FadType> teuchos_blas;
723  teuchos_blas.NRM2(m, &X[0], 1);
724 
725  Teuchos::BLAS<int,FadType> sacado_blas(false);
727  sacado_blas.NRM2(m, &X[0], 1);
728 
729  COMPARE_FADS(z1, z2);
730 
731  unsigned int sz = m*(1+ndot);
732  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
734  sacado_blas2.NRM2(m, &X[0], 1);
735 
736  COMPARE_FADS(z1, z3);
737 }
738 
739 // Tests non-unit inc
741  typedef decltype(this->fad) FadType;
743  auto m = this->m_;
744  auto ndot = this->ndot_;
745 
746  unsigned int incx = 2;
747  VectorType X(m*incx,ndot);
748  for (unsigned int i=0; i<m*incx; i++) {
749  X[i] = FadType(ndot, this->real_urand.number());
750  for (unsigned int k=0; k<ndot; k++) {
751  X[i].fastAccessDx(k) = this->real_urand.number();
752  }
753  }
754 
755  Teuchos::BLAS<int,FadType> teuchos_blas;
757  teuchos_blas.NRM2(m, &X[0], incx);
758 
759  Teuchos::BLAS<int,FadType> sacado_blas(false);
761  sacado_blas.NRM2(m, &X[0], incx);
762 
763  COMPARE_FADS(z1, z2);
764 
765  unsigned int sz = m*(1+ndot);
766  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
768  sacado_blas2.NRM2(m, &X[0], incx);
769 
770  COMPARE_FADS(z1, z3);
771 }
772 
773 // Tests all arguments
775  typedef decltype(this->fad) FadType;
776  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
778  auto m = this->m_;
779  auto n = this->n_;
780  auto ndot = this->ndot_;
781 
782  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
783  for (unsigned int j=0; j<n; j++) {
784  for (unsigned int i=0; i<m; i++) {
785  A[i+j*m] = FadType(ndot, this->urand.number());
786  for (unsigned int k=0; k<ndot; k++)
787  A[i+j*m].fastAccessDx(k) = this->urand.number();
788  }
789  B[j] = FadType(ndot, this->urand.number());
790  for (unsigned int k=0; k<ndot; k++)
791  B[j].fastAccessDx(k) = this->urand.number();
792  }
793  FadType alpha(ndot, this->urand.number());
794  FadType beta(ndot, this->urand.number());
795  for (unsigned int k=0; k<ndot; k++) {
796  alpha.fastAccessDx(k) = this->urand.number();
797  beta.fastAccessDx(k) = this->urand.number();
798  }
799 
800  for (unsigned int i=0; i<m; i++) {
801  ScalarType val = this->urand.number();
802  C1[i] = FadType(ndot, val);
803  C2[i] = FadType(ndot, val);
804  C3[i] = FadType(ndot, val);
805  for (unsigned int k=0; k<ndot; k++) {
806  val = this->urand.number();
807  C1[i].fastAccessDx(k) = val;
808  C2[i].fastAccessDx(k) = val;
809  C3[i].fastAccessDx(k) = val;
810  }
811  }
812 
813  Teuchos::BLAS<int,FadType> teuchos_blas;
814  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
815  beta, &C1[0], 1);
816 
817  Teuchos::BLAS<int,FadType> sacado_blas(false);
818  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
819  beta, &C2[0], 1);
820 
821  COMPARE_FAD_VECTORS(C1, C2, m);
822 
823  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
824  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
825  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
826  beta, &C3[0], 1);
827 
828  COMPARE_FAD_VECTORS(C1, C3, m);
829 }
830 
831 // Tests non-unit inc and different lda
833  typedef decltype(this->fad) FadType;
834  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
836  auto m = this->m_;
837  auto n = this->n_;
838  auto ndot = this->ndot_;
839 
840  unsigned int lda = m+3;
841  unsigned int incb = 2;
842  unsigned int incc = 3;
843  VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
844  C3(m*incc,ndot);
845  for (unsigned int j=0; j<n; j++) {
846  for (unsigned int i=0; i<lda; i++) {
847  A[i+j*lda] = FadType(ndot, this->urand.number());
848  for (unsigned int k=0; k<ndot; k++)
849  A[i+j*lda].fastAccessDx(k) = this->urand.number();
850  }
851  }
852  for (unsigned int j=0; j<n*incb; j++) {
853  B[j] = FadType(ndot, this->urand.number());
854  for (unsigned int k=0; k<ndot; k++)
855  B[j].fastAccessDx(k) = this->urand.number();
856  }
857  FadType alpha(ndot, this->urand.number());
858  FadType beta(ndot, this->urand.number());
859  for (unsigned int k=0; k<ndot; k++) {
860  alpha.fastAccessDx(k) = this->urand.number();
861  beta.fastAccessDx(k) = this->urand.number();
862  }
863 
864  for (unsigned int i=0; i<m*incc; i++) {
865  ScalarType val = this->urand.number();
866  C1[i] = FadType(ndot, val);
867  C2[i] = FadType(ndot, val);
868  C3[i] = FadType(ndot, val);
869  for (unsigned int k=0; k<ndot; k++) {
870  val = this->urand.number();
871  C1[i].fastAccessDx(k) = val;
872  C2[i].fastAccessDx(k) = val;
873  C3[i].fastAccessDx(k) = val;
874  }
875  }
876 
877  Teuchos::BLAS<int,FadType> teuchos_blas;
878  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
879  beta, &C1[0], incc);
880 
881  Teuchos::BLAS<int,FadType> sacado_blas(false);
882  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
883  beta, &C2[0], incc);
884 
885  COMPARE_FAD_VECTORS(C1, C2, m*incc);
886 
887  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
888  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
889  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
890  beta, &C3[0], incc);
891 
892  COMPARE_FAD_VECTORS(C1, C3, m*incc);
893 }
894 
895 // Tests transpose with all arguments
897  typedef decltype(this->fad) FadType;
898  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
900  auto m = this->m_;
901  auto n = this->n_;
902  auto ndot = this->ndot_;
903 
904  VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
905  for (unsigned int j=0; j<n; j++) {
906  for (unsigned int i=0; i<m; i++) {
907  A[i+j*m] = FadType(ndot, this->urand.number());
908  for (unsigned int k=0; k<ndot; k++)
909  A[i+j*m].fastAccessDx(k) = this->urand.number();
910  }
911  }
912  for (unsigned int j=0; j<m; j++) {
913  B[j] = FadType(ndot, this->urand.number());
914  for (unsigned int k=0; k<ndot; k++)
915  B[j].fastAccessDx(k) = this->urand.number();
916  }
917  FadType alpha(ndot, this->urand.number());
918  FadType beta(ndot, this->urand.number());
919  for (unsigned int k=0; k<ndot; k++) {
920  alpha.fastAccessDx(k) = this->urand.number();
921  beta.fastAccessDx(k) = this->urand.number();
922  }
923 
924  for (unsigned int i=0; i<n; i++) {
925  ScalarType val = this->urand.number();
926  C1[i] = FadType(ndot, val);
927  C2[i] = FadType(ndot, val);
928  C3[i] = FadType(ndot, val);
929  for (unsigned int k=0; k<ndot; k++) {
930  val = this->urand.number();
931  C1[i].fastAccessDx(k) = val;
932  C2[i].fastAccessDx(k) = val;
933  C3[i].fastAccessDx(k) = val;
934  }
935  }
936 
937  Teuchos::BLAS<int,FadType> teuchos_blas;
938  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
939  beta, &C1[0], 1);
940 
941  Teuchos::BLAS<int,FadType> sacado_blas(false);
942  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
943  beta, &C2[0], 1);
944 
945  COMPARE_FAD_VECTORS(C1, C2, n);
946 
947  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
948  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
949  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
950  beta, &C3[0], 1);
951 
952  COMPARE_FAD_VECTORS(C1, C3, n);
953 }
954 
955 // Tests transpose with non-unit inc and different lda
957  typedef decltype(this->fad) FadType;
958  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
960  auto m = this->m_;
961  auto n = this->n_;
962  auto ndot = this->ndot_;
963 
964  unsigned int lda = m+3;
965  unsigned int incb = 2;
966  unsigned int incc = 3;
967  VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
968  C3(n*incc,ndot);
969  for (unsigned int j=0; j<n; j++) {
970  for (unsigned int i=0; i<lda; i++) {
971  A[i+j*lda] = FadType(ndot, this->urand.number());
972  for (unsigned int k=0; k<ndot; k++)
973  A[i+j*lda].fastAccessDx(k) = this->urand.number();
974  }
975  }
976  for (unsigned int j=0; j<m*incb; j++) {
977  B[j] = FadType(ndot, this->urand.number());
978  for (unsigned int k=0; k<ndot; k++)
979  B[j].fastAccessDx(k) = this->urand.number();
980  }
981  FadType alpha(ndot, this->urand.number());
982  FadType beta(ndot, this->urand.number());
983  for (unsigned int k=0; k<ndot; k++) {
984  alpha.fastAccessDx(k) = this->urand.number();
985  beta.fastAccessDx(k) = this->urand.number();
986  }
987 
988  for (unsigned int i=0; i<n*incc; i++) {
989  ScalarType val = this->urand.number();
990  C1[i] = FadType(ndot, val);
991  C2[i] = FadType(ndot, val);
992  C3[i] = FadType(ndot, val);
993  for (unsigned int k=0; k<ndot; k++) {
994  val = this->urand.number();
995  C1[i].fastAccessDx(k) = val;
996  C2[i].fastAccessDx(k) = val;
997  C3[i].fastAccessDx(k) = val;
998  }
999  }
1000 
1001  Teuchos::BLAS<int,FadType> teuchos_blas;
1002  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1003  beta, &C1[0], incc);
1004 
1005  Teuchos::BLAS<int,FadType> sacado_blas(false);
1006  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1007  beta, &C2[0], incc);
1008 
1009  COMPARE_FAD_VECTORS(C1, C2, n*incc);
1010 
1011  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1012  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1013  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1014  beta, &C3[0], incc);
1015 
1016  COMPARE_FAD_VECTORS(C1, C3, n*incc);
1017 }
1018 
1019 // Tests with constant C
1021  typedef decltype(this->fad) FadType;
1022  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1023  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1024  auto m = this->m_;
1025  auto n = this->n_;
1026  auto ndot = this->ndot_;
1027 
1028  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1029  for (unsigned int j=0; j<n; j++) {
1030  for (unsigned int i=0; i<m; i++) {
1031  A[i+j*m] = FadType(ndot, this->urand.number());
1032  for (unsigned int k=0; k<ndot; k++)
1033  A[i+j*m].fastAccessDx(k) = this->urand.number();
1034  }
1035  B[j] = FadType(ndot, this->urand.number());
1036  for (unsigned int k=0; k<ndot; k++)
1037  B[j].fastAccessDx(k) = this->urand.number();
1038  }
1039  FadType alpha(ndot, this->urand.number());
1040  FadType beta(ndot, this->urand.number());
1041  for (unsigned int k=0; k<ndot; k++) {
1042  alpha.fastAccessDx(k) = this->urand.number();
1043  beta.fastAccessDx(k) = this->urand.number();
1044  }
1045 
1046  for (unsigned int i=0; i<m; i++) {
1047  ScalarType val = this->urand.number();
1048  C1[i] = val;
1049  C2[i] = val;
1050  C3[i] = val;
1051  }
1052 
1053  Teuchos::BLAS<int,FadType> teuchos_blas;
1054  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1055  beta, &C1[0], 1);
1056 
1057  Teuchos::BLAS<int,FadType> sacado_blas(false);
1058  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1059  beta, &C2[0], 1);
1060 
1061  COMPARE_FAD_VECTORS(C1, C2, m);
1062 
1063  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1064  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1065  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1066  beta, &C3[0], 1);
1067 
1068  COMPARE_FAD_VECTORS(C1, C3, m);
1069 }
1070 
1071 // Tests with constant alpha, beta
1073  typedef decltype(this->fad) FadType;
1074  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1075  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1076  auto m = this->m_;
1077  auto n = this->n_;
1078  auto ndot = this->ndot_;
1079 
1080  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1081  for (unsigned int j=0; j<n; j++) {
1082  for (unsigned int i=0; i<m; i++) {
1083  A[i+j*m] = FadType(ndot, this->urand.number());
1084  for (unsigned int k=0; k<ndot; k++)
1085  A[i+j*m].fastAccessDx(k) = this->urand.number();
1086  }
1087  B[j] = FadType(ndot, this->urand.number());
1088  for (unsigned int k=0; k<ndot; k++)
1089  B[j].fastAccessDx(k) = this->urand.number();
1090  }
1091  ScalarType alpha = this->urand.number();
1092  ScalarType beta = this->urand.number();
1093 
1094  for (unsigned int i=0; i<m; i++) {
1095  ScalarType val = this->urand.number();
1096  C1[i] = FadType(ndot, val);
1097  C2[i] = FadType(ndot, val);
1098  C3[i] = FadType(ndot, val);
1099  for (unsigned int k=0; k<ndot; k++) {
1100  val = this->urand.number();
1101  C1[i].fastAccessDx(k) = val;
1102  C2[i].fastAccessDx(k) = val;
1103  C3[i].fastAccessDx(k) = val;
1104  }
1105  }
1106 
1107  Teuchos::BLAS<int,FadType> teuchos_blas;
1108  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1109  beta, &C1[0], 1);
1110 
1111  Teuchos::BLAS<int,FadType> sacado_blas(false);
1112  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1113  beta, &C2[0], 1);
1114 
1115  COMPARE_FAD_VECTORS(C1, C2, m);
1116 
1117  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1118  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1119  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1120  beta, &C3[0], 1);
1121 
1122  COMPARE_FAD_VECTORS(C1, C3, m);
1123 }
1124 
1125 // Tests wth constant B
1127  typedef decltype(this->fad) FadType;
1128  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1129  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1130  auto m = this->m_;
1131  auto n = this->n_;
1132  auto ndot = this->ndot_;
1133 
1134  VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1135  C4(m,ndot);
1136  std::vector<ScalarType> b(n);
1137  for (unsigned int j=0; j<n; j++) {
1138  for (unsigned int i=0; i<m; i++) {
1139  A[i+j*m] = FadType(ndot, this->urand.number());
1140  for (unsigned int k=0; k<ndot; k++)
1141  A[i+j*m].fastAccessDx(k) = this->urand.number();
1142  }
1143  b[j] = this->urand.number();
1144  B[j] = b[j];
1145  }
1146  FadType alpha(ndot, this->urand.number());
1147  FadType beta(ndot, this->urand.number());
1148  for (unsigned int k=0; k<ndot; k++) {
1149  alpha.fastAccessDx(k) = this->urand.number();
1150  beta.fastAccessDx(k) = this->urand.number();
1151  }
1152 
1153  for (unsigned int i=0; i<m; i++) {
1154  ScalarType val = this->urand.number();
1155  C1[i] = FadType(ndot, val);
1156  C2[i] = FadType(ndot, val);
1157  C3[i] = FadType(ndot, val);
1158  C4[i] = FadType(ndot, val);
1159  for (unsigned int k=0; k<ndot; k++) {
1160  val = this->urand.number();
1161  C1[i].fastAccessDx(k) = val;
1162  C2[i].fastAccessDx(k) = val;
1163  C3[i].fastAccessDx(k) = val;
1164  C4[i].fastAccessDx(k) = val;
1165  }
1166  }
1167 
1168  Teuchos::BLAS<int,FadType> teuchos_blas;
1169  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1170  beta, &C1[0], 1);
1171 
1172  Teuchos::BLAS<int,FadType> sacado_blas(false);
1173  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1174  beta, &C2[0], 1);
1175 
1176  COMPARE_FAD_VECTORS(C1, C2, m);
1177 
1178  unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1179  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1180  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1181  beta, &C3[0], 1);
1182 
1183  COMPARE_FAD_VECTORS(C1, C3, m);
1184 
1185  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1,
1186  beta, &C4[0], 1);
1187 
1188  COMPARE_FAD_VECTORS(C1, C4, m);
1189 }
1190 
1191 // Tests with constant A
1193  typedef decltype(this->fad) FadType;
1194  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1195  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1196  auto m = this->m_;
1197  auto n = this->n_;
1198  auto ndot = this->ndot_;
1199 
1200  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1201  C4(m,ndot);
1202  std::vector<ScalarType> a(m*n);
1203  for (unsigned int j=0; j<n; j++) {
1204  for (unsigned int i=0; i<m; i++) {
1205  a[i+j*m] = this->urand.number();
1206  A[i+j*m] = a[i+j*m];
1207  }
1208  B[j] = FadType(ndot, this->urand.number());
1209  for (unsigned int k=0; k<ndot; k++)
1210  B[j].fastAccessDx(k) = this->urand.number();
1211  }
1212  FadType alpha(ndot, this->urand.number());
1213  FadType beta(ndot, this->urand.number());
1214  for (unsigned int k=0; k<ndot; k++) {
1215  alpha.fastAccessDx(k) = this->urand.number();
1216  beta.fastAccessDx(k) = this->urand.number();
1217  }
1218 
1219  for (unsigned int i=0; i<m; i++) {
1220  ScalarType val = this->urand.number();
1221  C1[i] = FadType(ndot, val);
1222  C2[i] = FadType(ndot, val);
1223  C3[i] = FadType(ndot, val);
1224  C4[i] = FadType(ndot, val);
1225  for (unsigned int k=0; k<ndot; k++) {
1226  val = this->urand.number();
1227  C1[i].fastAccessDx(k) = val;
1228  C2[i].fastAccessDx(k) = val;
1229  C3[i].fastAccessDx(k) = val;
1230  C4[i].fastAccessDx(k) = val;
1231  }
1232  }
1233 
1234  Teuchos::BLAS<int,FadType> teuchos_blas;
1235  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1236  beta, &C1[0], 1);
1237 
1238  Teuchos::BLAS<int,FadType> sacado_blas(false);
1239  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1240  beta, &C2[0], 1);
1241 
1242  COMPARE_FAD_VECTORS(C1, C2, m);
1243 
1244  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1245  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1246  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1247  beta, &C3[0], 1);
1248 
1249  COMPARE_FAD_VECTORS(C1, C3, m);
1250 
1251  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1,
1252  beta, &C4[0], 1);
1253 
1254  COMPARE_FAD_VECTORS(C1, C4, m);
1255 }
1256 
1257 // Tests with constant A, B
1259  typedef decltype(this->fad) FadType;
1260  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1261  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1262  auto m = this->m_;
1263  auto n = this->n_;
1264  auto ndot = this->ndot_;
1265 
1266  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1267  C4(m,ndot);
1268  std::vector<ScalarType> a(m*n), b(n);
1269  for (unsigned int j=0; j<n; j++) {
1270  for (unsigned int i=0; i<m; i++) {
1271  a[i+j*m] = this->urand.number();
1272  A[i+j*m] = a[i+j*m];
1273  }
1274  b[j] = this->urand.number();
1275  B[j] = b[j];
1276  }
1277  FadType alpha(ndot, this->urand.number());
1278  FadType beta(ndot, this->urand.number());
1279  for (unsigned int k=0; k<ndot; k++) {
1280  alpha.fastAccessDx(k) = this->urand.number();
1281  beta.fastAccessDx(k) = this->urand.number();
1282  }
1283 
1284  for (unsigned int i=0; i<m; i++) {
1285  ScalarType val = this->urand.number();
1286  C1[i] = FadType(ndot, val);
1287  C2[i] = FadType(ndot, val);
1288  C3[i] = FadType(ndot, val);
1289  C4[i] = FadType(ndot, val);
1290  for (unsigned int k=0; k<ndot; k++) {
1291  val = this->urand.number();
1292  C1[i].fastAccessDx(k) = val;
1293  C2[i].fastAccessDx(k) = val;
1294  C3[i].fastAccessDx(k) = val;
1295  C4[i].fastAccessDx(k) = val;
1296  }
1297  }
1298 
1299  Teuchos::BLAS<int,FadType> teuchos_blas;
1300  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1301  beta, &C1[0], 1);
1302 
1303  Teuchos::BLAS<int,FadType> sacado_blas(false);
1304  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1305  beta, &C2[0], 1);
1306 
1307  COMPARE_FAD_VECTORS(C1, C2, m);
1308 
1309  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1310  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1311  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1312  beta, &C3[0], 1);
1313 
1314  COMPARE_FAD_VECTORS(C1, C3, m);
1315 
1316  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1,
1317  beta, &C4[0], 1);
1318 
1319  COMPARE_FAD_VECTORS(C1, C4, m);
1320 }
1321 
1322 // Tests all arguments
1324  typedef decltype(this->fad) FadType;
1325  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1326  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1327  auto n = this->n_;
1328  auto ndot = this->ndot_;
1329 
1330  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1331  for (unsigned int j=0; j<n; j++) {
1332  for (unsigned int i=0; i<n; i++) {
1333  A[i+j*n] = FadType(ndot, this->urand.number());
1334  for (unsigned int k=0; k<ndot; k++)
1335  A[i+j*n].fastAccessDx(k) = this->urand.number();
1336  }
1337  ScalarType val = this->urand.number();
1338  x1[j] = FadType(ndot, val);
1339  x2[j] = FadType(ndot, val);
1340  x3[j] = FadType(ndot, val);
1341  for (unsigned int k=0; k<ndot; k++) {
1342  val = this->urand.number();
1343  x1[j].fastAccessDx(k) = val;
1344  x2[j].fastAccessDx(k) = val;
1345  x3[j].fastAccessDx(k) = val;
1346  }
1347  }
1348 
1349  Teuchos::BLAS<int,FadType> teuchos_blas;
1351  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1352 
1353  Teuchos::BLAS<int,FadType> sacado_blas(false);
1355  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1356 
1357  COMPARE_FAD_VECTORS(x1, x2, n);
1358 
1359  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1360  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1362  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1363 
1364  COMPARE_FAD_VECTORS(x1, x3, n);
1365 
1367  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1369  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1371  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1372  COMPARE_FAD_VECTORS(x1, x2, n);
1373  COMPARE_FAD_VECTORS(x1, x3, n);
1374 
1375  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1376  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1377  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1378  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1379  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1380  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1381  COMPARE_FAD_VECTORS(x1, x2, n);
1382  COMPARE_FAD_VECTORS(x1, x3, n);
1383 
1384  for (unsigned int i=0; i<n; i++) {
1385  A[i*n+i].val() = 1.0;
1386  for (unsigned int k=0; k<ndot; k++)
1387  A[i*n+i].fastAccessDx(k) = 0.0;
1388  }
1390  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1392  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1394  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1395  COMPARE_FAD_VECTORS(x1, x2, n);
1396  COMPARE_FAD_VECTORS(x1, x3, n);
1397 }
1398 
1399 // Tests non unit inc, different lda
1401  typedef decltype(this->fad) FadType;
1402  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1403  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1404  auto n = this->n_;
1405  auto ndot = this->ndot_;
1406 
1407  unsigned int lda = n+3;
1408  unsigned int incx = 2;
1409  VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1410  for (unsigned int j=0; j<n; j++) {
1411  for (unsigned int i=0; i<lda; i++) {
1412  A[i+j*lda] = FadType(ndot, this->urand.number());
1413  for (unsigned int k=0; k<ndot; k++)
1414  A[i+j*lda].fastAccessDx(k) = this->urand.number();
1415  }
1416  }
1417  for (unsigned int j=0; j<n*incx; j++) {
1418  ScalarType val = this->urand.number();
1419  x1[j] = FadType(ndot, val);
1420  x2[j] = FadType(ndot, val);
1421  x3[j] = FadType(ndot, val);
1422  for (unsigned int k=0; k<ndot; k++) {
1423  val = this->urand.number();
1424  x1[j].fastAccessDx(k) = val;
1425  x2[j].fastAccessDx(k) = val;
1426  x3[j].fastAccessDx(k) = val;
1427  }
1428  }
1429 
1430  Teuchos::BLAS<int,FadType> teuchos_blas;
1432  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1433 
1434  Teuchos::BLAS<int,FadType> sacado_blas(false);
1436  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1437 
1438  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1439 
1440  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1441  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1443  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1444 
1445  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1446 
1448  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1450  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1452  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1453  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1454  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1455 
1456  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1457  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1458  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1459  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1460  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1461  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1462  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1463  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1464 
1465  for (unsigned int i=0; i<n; i++) {
1466  A[i*lda+i].val() = 1.0;
1467  for (unsigned int k=0; k<ndot; k++)
1468  A[i*lda+i].fastAccessDx(k) = 0.0;
1469  }
1471  Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1473  Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1475  Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1476  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1477  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1478 }
1479 
1480 // Tests A constant
1482  typedef decltype(this->fad) FadType;
1483  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1484  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1485  auto n = this->n_;
1486  auto ndot = this->ndot_;
1487 
1488  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1489  x5(n,ndot);
1490  std::vector<ScalarType> a(n*n);
1491  for (unsigned int j=0; j<n; j++) {
1492  for (unsigned int i=0; i<n; i++) {
1493  a[i+j*n] = this->urand.number();
1494  A[i+j*n] = a[i+j*n];
1495  }
1496  ScalarType val = this->urand.number();
1497  x1[j] = FadType(ndot, val);
1498  x2[j] = FadType(ndot, val);
1499  x3[j] = FadType(ndot, val);
1500  x4[j] = FadType(ndot, val);
1501  x5[j] = FadType(ndot, val);
1502  for (unsigned int k=0; k<ndot; k++) {
1503  val = this->urand.number();
1504  x1[j].fastAccessDx(k) = val;
1505  x2[j].fastAccessDx(k) = val;
1506  x3[j].fastAccessDx(k) = val;
1507  x4[j].fastAccessDx(k) = val;
1508  x5[j].fastAccessDx(k) = val;
1509  }
1510  }
1511 
1512  Teuchos::BLAS<int,FadType> teuchos_blas;
1514  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1515 
1516  Teuchos::BLAS<int,FadType> sacado_blas(false);
1518  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1519 
1520  COMPARE_FAD_VECTORS(x1, x2, n);
1521 
1522  unsigned int sz = n*n+n*(1+ndot);
1523  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1524  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1525  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1526 
1527  COMPARE_FAD_VECTORS(x1, x3, n);
1528 
1530  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1531 
1532  COMPARE_FAD_VECTORS(x1, x4, n);
1533 
1534  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1535  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
1536 
1537  COMPARE_FAD_VECTORS(x1, x5, n);
1538 
1540  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1542  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1543  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1544  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1546  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1547  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1548  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1549  COMPARE_FAD_VECTORS(x1, x2, n);
1550  COMPARE_FAD_VECTORS(x1, x3, n);
1551  COMPARE_FAD_VECTORS(x1, x4, n);
1552  COMPARE_FAD_VECTORS(x1, x5, n);
1553 
1554  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1555  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1556  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1557  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1558  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1559  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1560  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1561  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1562  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1563  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1564  COMPARE_FAD_VECTORS(x1, x2, n);
1565  COMPARE_FAD_VECTORS(x1, x3, n);
1566  COMPARE_FAD_VECTORS(x1, x4, n);
1567  COMPARE_FAD_VECTORS(x1, x5, n);
1568 
1569  for (unsigned int i=0; i<n; i++) {
1570  A[i*n+i].val() = 1.0;
1571  for (unsigned int k=0; k<ndot; k++)
1572  A[i*n+i].fastAccessDx(k) = 0.0;
1573  }
1575  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1577  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1578  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1579  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1581  Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1582  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1583  Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1584  COMPARE_FAD_VECTORS(x1, x2, n);
1585  COMPARE_FAD_VECTORS(x1, x3, n);
1586  COMPARE_FAD_VECTORS(x1, x4, n);
1587  COMPARE_FAD_VECTORS(x1, x5, n);
1588 }
1589 
1590 // Tests x constant
1592  typedef decltype(this->fad) FadType;
1593  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1594  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1595  auto n = this->n_;
1596  auto ndot = this->ndot_;
1597 
1598  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1599  for (unsigned int j=0; j<n; j++) {
1600  for (unsigned int i=0; i<n; i++) {
1601  A[i+j*n] = FadType(ndot, this->urand.number());
1602  for (unsigned int k=0; k<ndot; k++)
1603  A[i+j*n].fastAccessDx(k) = this->urand.number();
1604  }
1605  ScalarType val = this->urand.number();
1606  x1[j] = val;
1607  x2[j] = val;
1608  x3[j] = val;
1609  }
1610 
1611  Teuchos::BLAS<int,FadType> teuchos_blas;
1613  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1614 
1615  Teuchos::BLAS<int,FadType> sacado_blas(false);
1617  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1618 
1619  COMPARE_FAD_VECTORS(x1, x2, n);
1620 
1621  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1622  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1624  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1625 
1626  COMPARE_FAD_VECTORS(x1, x3, n);
1627 
1629  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1631  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1633  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1634  COMPARE_FAD_VECTORS(x1, x2, n);
1635  COMPARE_FAD_VECTORS(x1, x3, n);
1636 
1637  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1638  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1639  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1640  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1641  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1642  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1643  COMPARE_FAD_VECTORS(x1, x2, n);
1644  COMPARE_FAD_VECTORS(x1, x3, n);
1645 
1646  for (unsigned int i=0; i<n; i++) {
1647  A[i*n+i].val() = 1.0;
1648  for (unsigned int k=0; k<ndot; k++)
1649  A[i*n+i].fastAccessDx(k) = 0.0;
1650  }
1652  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1654  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1656  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1657  COMPARE_FAD_VECTORS(x1, x2, n);
1658  COMPARE_FAD_VECTORS(x1, x3, n);
1659 }
1660 
1661 // Tests all arguments
1663  typedef decltype(this->fad) FadType;
1664  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1665  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1666  auto m = this->m_;
1667  auto n = this->n_;
1668  auto ndot = this->ndot_;
1669 
1670 
1671  // GER is apparently not defined in the BLAS for complex types
1673  return;
1674 
1675  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1676  for (unsigned int j=0; j<n; j++) {
1677  for (unsigned int i=0; i<m; i++) {
1678  ScalarType val = this->urand.number();
1679  A1[i+j*m] = FadType(ndot, val);
1680  A2[i+j*m] = FadType(ndot, val);
1681  A3[i+j*m] = FadType(ndot, val);
1682  for (unsigned int k=0; k<ndot; k++) {
1683  val = this->urand.number();
1684  A1[i+j*m].fastAccessDx(k) = val;
1685  A2[i+j*m].fastAccessDx(k) = val;
1686  A3[i+j*m].fastAccessDx(k) = val;
1687  }
1688  }
1689  }
1690  for (unsigned int i=0; i<m; i++) {
1691  x[i] = FadType(ndot, this->urand.number());
1692  for (unsigned int k=0; k<ndot; k++)
1693  x[i].fastAccessDx(k) = this->urand.number();
1694  }
1695  for (unsigned int i=0; i<n; i++) {
1696  y[i] = FadType(ndot, this->urand.number());
1697  for (unsigned int k=0; k<ndot; k++)
1698  y[i].fastAccessDx(k) = this->urand.number();
1699  }
1700  FadType alpha(ndot, this->urand.number());
1701  for (unsigned int k=0; k<ndot; k++) {
1702  alpha.fastAccessDx(k) = this->urand.number();
1703  }
1704 
1705  Teuchos::BLAS<int,FadType> teuchos_blas;
1706  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1707 
1708  Teuchos::BLAS<int,FadType> sacado_blas(false);
1709  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1710 
1711  COMPARE_FAD_VECTORS(A1, A2, m*n);
1712 
1713  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1714  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1715  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1716 
1717  COMPARE_FAD_VECTORS(A1, A3, m*n);
1718 }
1719 
1720 // Tests non unit inc, different lda
1722  typedef decltype(this->fad) FadType;
1723  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1724  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1725  auto m = this->m_;
1726  auto n = this->n_;
1727  auto ndot = this->ndot_;
1728 
1729 
1730  // GER is apparently not defined in the BLAS for complex types
1732  return;
1733 
1734  unsigned int lda = m+3;
1735  unsigned int incx = 2;
1736  unsigned int incy = 3;
1737  VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1738  y(n*incy,ndot);
1739  for (unsigned int j=0; j<n; j++) {
1740  for (unsigned int i=0; i<lda; i++) {
1741  ScalarType val = this->urand.number();
1742  A1[i+j*lda] = FadType(ndot, val);
1743  A2[i+j*lda] = FadType(ndot, val);
1744  A3[i+j*lda] = FadType(ndot, val);
1745  for (unsigned int k=0; k<ndot; k++) {
1746  val = this->urand.number();
1747  A1[i+j*lda].fastAccessDx(k) = val;
1748  A2[i+j*lda].fastAccessDx(k) = val;
1749  A3[i+j*lda].fastAccessDx(k) = val;
1750  }
1751  }
1752  }
1753  for (unsigned int i=0; i<m*incx; i++) {
1754  x[i] = FadType(ndot, this->urand.number());
1755  for (unsigned int k=0; k<ndot; k++)
1756  x[i].fastAccessDx(k) = this->urand.number();
1757  }
1758  for (unsigned int i=0; i<n*incy; i++) {
1759  y[i] = FadType(ndot, this->urand.number());
1760  for (unsigned int k=0; k<ndot; k++)
1761  y[i].fastAccessDx(k) = this->urand.number();
1762  }
1763  FadType alpha(ndot, this->urand.number());
1764  for (unsigned int k=0; k<ndot; k++) {
1765  alpha.fastAccessDx(k) = this->urand.number();
1766  }
1767 
1768  Teuchos::BLAS<int,FadType> teuchos_blas;
1769  teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1770 
1771  Teuchos::BLAS<int,FadType> sacado_blas(false);
1772  sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1773 
1774  COMPARE_FAD_VECTORS(A1, A2, lda*n);
1775 
1776  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1777  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1778  sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1779 
1780  COMPARE_FAD_VECTORS(A1, A3, lda*n);
1781 }
1782 
1783 // Tests constant alpha
1785  typedef decltype(this->fad) FadType;
1786  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1787  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1788  auto m = this->m_;
1789  auto n = this->n_;
1790  auto ndot = this->ndot_;
1791 
1792 
1793  // GER is apparently not defined in the BLAS for complex types
1795  return;
1796 
1797  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1798  for (unsigned int j=0; j<n; j++) {
1799  for (unsigned int i=0; i<m; i++) {
1800  ScalarType val = this->urand.number();
1801  A1[i+j*m] = FadType(ndot, val);
1802  A2[i+j*m] = FadType(ndot, val);
1803  A3[i+j*m] = FadType(ndot, val);
1804  for (unsigned int k=0; k<ndot; k++) {
1805  val = this->urand.number();
1806  A1[i+j*m].fastAccessDx(k) = val;
1807  A2[i+j*m].fastAccessDx(k) = val;
1808  A3[i+j*m].fastAccessDx(k) = val;
1809  }
1810  }
1811  }
1812  for (unsigned int i=0; i<m; i++) {
1813  x[i] = FadType(ndot, this->urand.number());
1814  for (unsigned int k=0; k<ndot; k++)
1815  x[i].fastAccessDx(k) = this->urand.number();
1816  }
1817  for (unsigned int i=0; i<n; i++) {
1818  y[i] = FadType(ndot, this->urand.number());
1819  for (unsigned int k=0; k<ndot; k++)
1820  y[i].fastAccessDx(k) = this->urand.number();
1821  }
1822  ScalarType alpha = this->urand.number();
1823 
1824  Teuchos::BLAS<int,FadType> teuchos_blas;
1825  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1826 
1827  Teuchos::BLAS<int,FadType> sacado_blas(false);
1828  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1829 
1830  COMPARE_FAD_VECTORS(A1, A2, m*n);
1831 
1832  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1833  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1834  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1835 
1836  COMPARE_FAD_VECTORS(A1, A3, m*n);
1837 }
1838 
1839 // Tests constant x
1841  typedef decltype(this->fad) FadType;
1842  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1843  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1844  auto m = this->m_;
1845  auto n = this->n_;
1846  auto ndot = this->ndot_;
1847 
1848 
1849  // GER is apparently not defined in the BLAS for complex types
1851  return;
1852 
1853  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1854  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1855  std::vector<ScalarType> xx(m);
1856  for (unsigned int j=0; j<n; j++) {
1857  for (unsigned int i=0; i<m; i++) {
1858  ScalarType val = this->urand.number();
1859  A1[i+j*m] = FadType(ndot, val);
1860  A2[i+j*m] = FadType(ndot, val);
1861  A3[i+j*m] = FadType(ndot, val);
1862  A4[i+j*m] = FadType(ndot, val);
1863  A5[i+j*m] = FadType(ndot, val);
1864  for (unsigned int k=0; k<ndot; k++) {
1865  val = this->urand.number();
1866  A1[i+j*m].fastAccessDx(k) = val;
1867  A2[i+j*m].fastAccessDx(k) = val;
1868  A3[i+j*m].fastAccessDx(k) = val;
1869  A4[i+j*m].fastAccessDx(k) = val;
1870  A5[i+j*m].fastAccessDx(k) = val;
1871  }
1872  }
1873  }
1874  for (unsigned int i=0; i<m; i++) {
1875  xx[i] = this->urand.number();
1876  x[i] = xx[i];
1877  }
1878  for (unsigned int i=0; i<n; i++) {
1879  y[i] = FadType(ndot, this->urand.number());
1880  for (unsigned int k=0; k<ndot; k++)
1881  y[i].fastAccessDx(k) = this->urand.number();
1882  }
1883  FadType alpha(ndot, this->urand.number());
1884  for (unsigned int k=0; k<ndot; k++) {
1885  alpha.fastAccessDx(k) = this->urand.number();
1886  }
1887 
1888  Teuchos::BLAS<int,FadType> teuchos_blas;
1889  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1890 
1891  Teuchos::BLAS<int,FadType> sacado_blas(false);
1892  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1893 
1894  COMPARE_FAD_VECTORS(A1, A2, m*n);
1895 
1896  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1897  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1898  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1899 
1900  COMPARE_FAD_VECTORS(A1, A3, m*n);
1901 
1902  sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
1903 
1904  COMPARE_FAD_VECTORS(A1, A4, m*n);
1905 
1906  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
1907 
1908  COMPARE_FAD_VECTORS(A1, A5, m*n);
1909 }
1910 
1911 // Tests constant y
1913  typedef decltype(this->fad) FadType;
1914  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1915  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1916  auto m = this->m_;
1917  auto n = this->n_;
1918  auto ndot = this->ndot_;
1919 
1920 
1921  // GER is apparently not defined in the BLAS for complex types
1923  return;
1924 
1925  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1926  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1927  std::vector<ScalarType> yy(n);
1928  for (unsigned int j=0; j<n; j++) {
1929  for (unsigned int i=0; i<m; i++) {
1930  ScalarType val = this->urand.number();
1931  A1[i+j*m] = FadType(ndot, val);
1932  A2[i+j*m] = FadType(ndot, val);
1933  A3[i+j*m] = FadType(ndot, val);
1934  A4[i+j*m] = FadType(ndot, val);
1935  A5[i+j*m] = FadType(ndot, val);
1936  for (unsigned int k=0; k<ndot; k++) {
1937  val = this->urand.number();
1938  A1[i+j*m].fastAccessDx(k) = val;
1939  A2[i+j*m].fastAccessDx(k) = val;
1940  A3[i+j*m].fastAccessDx(k) = val;
1941  A4[i+j*m].fastAccessDx(k) = val;
1942  A5[i+j*m].fastAccessDx(k) = val;
1943  }
1944  }
1945  }
1946  for (unsigned int i=0; i<m; i++) {
1947  x[i] = FadType(ndot, this->urand.number());
1948  for (unsigned int k=0; k<ndot; k++)
1949  x[i].fastAccessDx(k) = this->urand.number();
1950  }
1951  for (unsigned int i=0; i<n; i++) {
1952  yy[i] = this->urand.number();
1953  y[i] = yy[i];
1954  }
1955  FadType alpha(ndot, this->urand.number());
1956  for (unsigned int k=0; k<ndot; k++) {
1957  alpha.fastAccessDx(k) = this->urand.number();
1958  }
1959 
1960  Teuchos::BLAS<int,FadType> teuchos_blas;
1961  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1962 
1963  Teuchos::BLAS<int,FadType> sacado_blas(false);
1964  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1965 
1966  COMPARE_FAD_VECTORS(A1, A2, m*n);
1967 
1968  unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1969  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1970  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1971 
1972  COMPARE_FAD_VECTORS(A1, A3, m*n);
1973 
1974  sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
1975 
1976  COMPARE_FAD_VECTORS(A1, A4, m*n);
1977 
1978  sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
1979 
1980  COMPARE_FAD_VECTORS(A1, A5, m*n);
1981 }
1982 
1983 // Tests constant x and y
1985  typedef decltype(this->fad) FadType;
1986  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1987  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1988  auto m = this->m_;
1989  auto n = this->n_;
1990  auto ndot = this->ndot_;
1991 
1992 
1993  // GER is apparently not defined in the BLAS for complex types
1995  return;
1996 
1997  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1998  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1999  std::vector<ScalarType> xx(n), yy(n);
2000  for (unsigned int j=0; j<n; j++) {
2001  for (unsigned int i=0; i<m; i++) {
2002  ScalarType val = this->urand.number();
2003  A1[i+j*m] = FadType(ndot, val);
2004  A2[i+j*m] = FadType(ndot, val);
2005  A3[i+j*m] = FadType(ndot, val);
2006  A4[i+j*m] = FadType(ndot, val);
2007  A5[i+j*m] = FadType(ndot, val);
2008  for (unsigned int k=0; k<ndot; k++) {
2009  val = this->urand.number();
2010  A1[i+j*m].fastAccessDx(k) = val;
2011  A2[i+j*m].fastAccessDx(k) = val;
2012  A3[i+j*m].fastAccessDx(k) = val;
2013  A4[i+j*m].fastAccessDx(k) = val;
2014  A5[i+j*m].fastAccessDx(k) = val;
2015  }
2016  }
2017  }
2018  for (unsigned int i=0; i<m; i++) {
2019  xx[i] = this->urand.number();
2020  x[i] = xx[i];
2021  }
2022  for (unsigned int i=0; i<n; i++) {
2023  yy[i] = this->urand.number();
2024  y[i] = yy[i];
2025  }
2026  FadType alpha(ndot, this->urand.number());
2027  for (unsigned int k=0; k<ndot; k++) {
2028  alpha.fastAccessDx(k) = this->urand.number();
2029  }
2030 
2031  Teuchos::BLAS<int,FadType> teuchos_blas;
2032  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2033 
2034  Teuchos::BLAS<int,FadType> sacado_blas(false);
2035  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2036 
2037  COMPARE_FAD_VECTORS(A1, A2, m*n);
2038 
2039  unsigned int sz = m*n*(1+ndot) + m + n;
2040  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2041  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2042 
2043  COMPARE_FAD_VECTORS(A1, A3, m*n);
2044 
2045  sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2046 
2047  COMPARE_FAD_VECTORS(A1, A4, m*n);
2048 
2049  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2050 
2051  COMPARE_FAD_VECTORS(A1, A5, m*n);
2052 }
2053 
2054 // Tests constant A
2056  typedef decltype(this->fad) FadType;
2057  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2058  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2059  auto m = this->m_;
2060  auto n = this->n_;
2061  auto ndot = this->ndot_;
2062 
2063 
2064  // GER is apparently not defined in the BLAS for complex types
2066  return;
2067 
2068  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2069  for (unsigned int j=0; j<n; j++) {
2070  for (unsigned int i=0; i<m; i++) {
2071  ScalarType val = this->urand.number();
2072  A1[i+j*m] = val;
2073  A2[i+j*m] = val;
2074  A3[i+j*m] = val;
2075  }
2076  }
2077  for (unsigned int i=0; i<m; i++) {
2078  x[i] = FadType(ndot, this->urand.number());
2079  for (unsigned int k=0; k<ndot; k++)
2080  x[i].fastAccessDx(k) = this->urand.number();
2081  }
2082  for (unsigned int i=0; i<n; i++) {
2083  y[i] = FadType(ndot, this->urand.number());
2084  for (unsigned int k=0; k<ndot; k++)
2085  y[i].fastAccessDx(k) = this->urand.number();
2086  }
2087  FadType alpha(ndot, this->urand.number());
2088  for (unsigned int k=0; k<ndot; k++) {
2089  alpha.fastAccessDx(k) = this->urand.number();
2090  }
2091 
2092  Teuchos::BLAS<int,FadType> teuchos_blas;
2093  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2094 
2095  Teuchos::BLAS<int,FadType> sacado_blas(false);
2096  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2097 
2098  COMPARE_FAD_VECTORS(A1, A2, m*n);
2099 
2100  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2101  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2102  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2103 
2104  COMPARE_FAD_VECTORS(A1, A3, m*n);
2105 }
2106 
2107 // Tests all arguments
2109  typedef decltype(this->fad) FadType;
2110  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2111  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2112  auto m = this->m_;
2113  auto n = this->n_;
2114  auto l = this->l_;
2115  auto ndot = this->ndot_;
2116 
2117  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2118  for (unsigned int j=0; j<l; j++) {
2119  for (unsigned int i=0; i<m; i++) {
2120  A[i+j*m] = FadType(ndot, this->urand.number());
2121  for (unsigned int k=0; k<ndot; k++)
2122  A[i+j*m].fastAccessDx(k) = this->urand.number();
2123  }
2124  }
2125  for (unsigned int j=0; j<n; j++) {
2126  for (unsigned int i=0; i<l; i++) {
2127  B[i+j*l] = FadType(ndot, this->urand.number());
2128  for (unsigned int k=0; k<ndot; k++)
2129  B[i+j*l].fastAccessDx(k) = this->urand.number();
2130  }
2131  }
2132  FadType alpha(ndot, this->urand.number());
2133  FadType beta(ndot, this->urand.number());
2134  for (unsigned int k=0; k<ndot; k++) {
2135  alpha.fastAccessDx(k) = this->urand.number();
2136  beta.fastAccessDx(k) = this->urand.number();
2137  }
2138 
2139  for (unsigned int j=0; j<n; j++) {
2140  for (unsigned int i=0; i<m; i++) {
2141  ScalarType val = this->urand.number();
2142  C1[i+j*m] = FadType(ndot, val);
2143  C2[i+j*m] = FadType(ndot, val);
2144  C3[i+j*m] = FadType(ndot, val);
2145  for (unsigned int k=0; k<ndot; k++) {
2146  val = this->urand.number();
2147  C1[i+j*m].fastAccessDx(k) = val;
2148  C2[i+j*m].fastAccessDx(k) = val;
2149  C3[i+j*m].fastAccessDx(k) = val;
2150  }
2151  }
2152  }
2153 
2154  Teuchos::BLAS<int,FadType> teuchos_blas;
2155  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2156  &A[0], m, &B[0], l, beta, &C1[0], m);
2157 
2158  Teuchos::BLAS<int,FadType> sacado_blas(false);
2159  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2160  &A[0], m, &B[0], l, beta, &C2[0], m);
2161 
2162  COMPARE_FAD_VECTORS(C1, C2, m*n);
2163 
2164  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2165  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2166  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2167  &A[0], m, &B[0], l, beta, &C3[0], m);
2168 
2169  COMPARE_FAD_VECTORS(C1, C3, m*n);
2170 
2171  // transa
2172  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2173  &A[0], l, &B[0], l, beta, &C1[0], m);
2174  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2175  &A[0], l, &B[0], l, beta, &C2[0], m);
2176  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2177  &A[0], l, &B[0], l, beta, &C3[0], m);
2178 
2179  COMPARE_FAD_VECTORS(C1, C2, m*n);
2180  COMPARE_FAD_VECTORS(C1, C3, m*n);
2181 
2182  // transb
2183  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2184  &A[0], m, &B[0], n, beta, &C1[0], m);
2185  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2186  &A[0], m, &B[0], n, beta, &C2[0], m);
2187  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2188  &A[0], m, &B[0], n, beta, &C3[0], m);
2189 
2190  COMPARE_FAD_VECTORS(C1, C2, m*n);
2191  COMPARE_FAD_VECTORS(C1, C3, m*n);
2192 
2193  // transa and transb
2194  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2195  &A[0], l, &B[0], n, beta, &C1[0], m);
2196  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2197  &A[0], l, &B[0], n, beta, &C2[0], m);
2198  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2199  &A[0], l, &B[0], n, beta, &C3[0], m);
2200 
2201  COMPARE_FAD_VECTORS(C1, C2, m*n);
2202  COMPARE_FAD_VECTORS(C1, C3, m*n);
2203 }
2204 
2205 // Tests different lda, ldb, ldc
2207  typedef decltype(this->fad) FadType;
2208  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2209  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2210  auto m = this->m_;
2211  auto n = this->n_;
2212  auto l = this->l_;
2213  auto ndot = this->ndot_;
2214 
2215  unsigned int lda = m+4;
2216  unsigned int ldb = l+4;
2217  unsigned int ldc = m+5;
2218  VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2219  C3(ldc*n,ndot);
2220  for (unsigned int j=0; j<l; j++) {
2221  for (unsigned int i=0; i<lda; i++) {
2222  A[i+j*lda] = FadType(ndot, this->urand.number());
2223  for (unsigned int k=0; k<ndot; k++)
2224  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2225  }
2226  }
2227  for (unsigned int j=0; j<n; j++) {
2228  for (unsigned int i=0; i<ldb; i++) {
2229  B[i+j*ldb] = FadType(ndot, this->urand.number());
2230  for (unsigned int k=0; k<ndot; k++)
2231  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2232  }
2233  }
2234  FadType alpha(ndot, this->urand.number());
2235  FadType beta(ndot, this->urand.number());
2236  for (unsigned int k=0; k<ndot; k++) {
2237  alpha.fastAccessDx(k) = this->urand.number();
2238  beta.fastAccessDx(k) = this->urand.number();
2239  }
2240 
2241  for (unsigned int j=0; j<n; j++) {
2242  for (unsigned int i=0; i<ldc; i++) {
2243  ScalarType val = this->urand.number();
2244  C1[i+j*ldc] = FadType(ndot, val);
2245  C2[i+j*ldc] = FadType(ndot, val);
2246  C3[i+j*ldc] = FadType(ndot, val);
2247  for (unsigned int k=0; k<ndot; k++) {
2248  val = this->urand.number();
2249  C1[i+j*ldc].fastAccessDx(k) = val;
2250  C2[i+j*ldc].fastAccessDx(k) = val;
2251  C3[i+j*ldc].fastAccessDx(k) = val;
2252  }
2253  }
2254  }
2255 
2256  Teuchos::BLAS<int,FadType> teuchos_blas;
2257  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2258  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2259 
2260  Teuchos::BLAS<int,FadType> sacado_blas(false);
2261  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2262  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2263 
2264  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2265 
2266  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2267  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2268  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2269  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2270 
2271  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2272 }
2273 
2274 // Tests different lda, ldb, ldc with transa
2276  typedef decltype(this->fad) FadType;
2277  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2278  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2279  auto m = this->m_;
2280  auto n = this->n_;
2281  auto l = this->l_;
2282  auto ndot = this->ndot_;
2283 
2284  unsigned int lda = l+3;
2285  unsigned int ldb = l+4;
2286  unsigned int ldc = m+5;
2287  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2288  C3(ldc*n,ndot);
2289  for (unsigned int j=0; j<m; j++) {
2290  for (unsigned int i=0; i<lda; i++) {
2291  A[i+j*lda] = FadType(ndot, this->urand.number());
2292  for (unsigned int k=0; k<ndot; k++)
2293  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2294  }
2295  }
2296  for (unsigned int j=0; j<n; j++) {
2297  for (unsigned int i=0; i<ldb; i++) {
2298  B[i+j*ldb] = FadType(ndot, this->urand.number());
2299  for (unsigned int k=0; k<ndot; k++)
2300  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2301  }
2302  }
2303  FadType alpha(ndot, this->urand.number());
2304  FadType beta(ndot, this->urand.number());
2305  for (unsigned int k=0; k<ndot; k++) {
2306  alpha.fastAccessDx(k) = this->urand.number();
2307  beta.fastAccessDx(k) = this->urand.number();
2308  }
2309 
2310  for (unsigned int j=0; j<n; j++) {
2311  for (unsigned int i=0; i<ldc; i++) {
2312  ScalarType val = this->urand.number();
2313  C1[i+j*ldc] = FadType(ndot, val);
2314  C2[i+j*ldc] = FadType(ndot, val);
2315  C3[i+j*ldc] = FadType(ndot, val);
2316  for (unsigned int k=0; k<ndot; k++) {
2317  val = this->urand.number();
2318  C1[i+j*ldc].fastAccessDx(k) = val;
2319  C2[i+j*ldc].fastAccessDx(k) = val;
2320  C3[i+j*ldc].fastAccessDx(k) = val;
2321  }
2322  }
2323  }
2324 
2325  Teuchos::BLAS<int,FadType> teuchos_blas;
2326  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2327  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2328 
2329  Teuchos::BLAS<int,FadType> sacado_blas(false);
2330  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2331  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2332 
2333  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2334 
2335  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2336  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2337  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2338  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2339 
2340  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2341 }
2342 
2343 // Tests different lda, ldb, ldc with transb
2345  typedef decltype(this->fad) FadType;
2346  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2347  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2348  auto m = this->m_;
2349  auto n = this->n_;
2350  auto l = this->l_;
2351  auto ndot = this->ndot_;
2352 
2353  unsigned int lda = m+4;
2354  unsigned int ldb = n+4;
2355  unsigned int ldc = m+5;
2356  VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2357  C3(ldc*n,ndot);
2358  for (unsigned int j=0; j<l; j++) {
2359  for (unsigned int i=0; i<lda; i++) {
2360  A[i+j*lda] = FadType(ndot, this->urand.number());
2361  for (unsigned int k=0; k<ndot; k++)
2362  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2363  }
2364  }
2365  for (unsigned int j=0; j<l; j++) {
2366  for (unsigned int i=0; i<ldb; i++) {
2367  B[i+j*ldb] = FadType(ndot, this->urand.number());
2368  for (unsigned int k=0; k<ndot; k++)
2369  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2370  }
2371  }
2372  FadType alpha(ndot, this->urand.number());
2373  FadType beta(ndot, this->urand.number());
2374  for (unsigned int k=0; k<ndot; k++) {
2375  alpha.fastAccessDx(k) = this->urand.number();
2376  beta.fastAccessDx(k) = this->urand.number();
2377  }
2378 
2379  for (unsigned int j=0; j<n; j++) {
2380  for (unsigned int i=0; i<ldc; i++) {
2381  ScalarType val = this->urand.number();
2382  C1[i+j*ldc] = FadType(ndot, val);
2383  C2[i+j*ldc] = FadType(ndot, val);
2384  C3[i+j*ldc] = FadType(ndot, val);
2385  for (unsigned int k=0; k<ndot; k++) {
2386  val = this->urand.number();
2387  C1[i+j*ldc].fastAccessDx(k) = val;
2388  C2[i+j*ldc].fastAccessDx(k) = val;
2389  C3[i+j*ldc].fastAccessDx(k) = val;
2390  }
2391  }
2392  }
2393 
2394  Teuchos::BLAS<int,FadType> teuchos_blas;
2395  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2396  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2397 
2398  Teuchos::BLAS<int,FadType> sacado_blas(false);
2399  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2400  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2401 
2402  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2403 
2404  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2405  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2406  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2407  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2408 
2409  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2410 }
2411 
2412 // Tests different lda, ldb, ldc with transa and transb
2414  typedef decltype(this->fad) FadType;
2415  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2416  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2417  auto m = this->m_;
2418  auto n = this->n_;
2419  auto l = this->l_;
2420  auto ndot = this->ndot_;
2421 
2422  unsigned int lda = l+3;
2423  unsigned int ldb = n+4;
2424  unsigned int ldc = m+5;
2425  VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2426  C3(ldc*n,ndot);
2427  for (unsigned int j=0; j<m; j++) {
2428  for (unsigned int i=0; i<lda; i++) {
2429  A[i+j*lda] = FadType(ndot, this->urand.number());
2430  for (unsigned int k=0; k<ndot; k++)
2431  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2432  }
2433  }
2434  for (unsigned int j=0; j<l; j++) {
2435  for (unsigned int i=0; i<ldb; i++) {
2436  B[i+j*ldb] = FadType(ndot, this->urand.number());
2437  for (unsigned int k=0; k<ndot; k++)
2438  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2439  }
2440  }
2441  FadType alpha(ndot, this->urand.number());
2442  FadType beta(ndot, this->urand.number());
2443  for (unsigned int k=0; k<ndot; k++) {
2444  alpha.fastAccessDx(k) = this->urand.number();
2445  beta.fastAccessDx(k) = this->urand.number();
2446  }
2447 
2448  for (unsigned int j=0; j<n; j++) {
2449  for (unsigned int i=0; i<ldc; i++) {
2450  ScalarType val = this->urand.number();
2451  C1[i+j*ldc] = FadType(ndot, val);
2452  C2[i+j*ldc] = FadType(ndot, val);
2453  C3[i+j*ldc] = FadType(ndot, val);
2454  for (unsigned int k=0; k<ndot; k++) {
2455  val = this->urand.number();
2456  C1[i+j*ldc].fastAccessDx(k) = val;
2457  C2[i+j*ldc].fastAccessDx(k) = val;
2458  C3[i+j*ldc].fastAccessDx(k) = val;
2459  }
2460  }
2461  }
2462 
2463  Teuchos::BLAS<int,FadType> teuchos_blas;
2464  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2465  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2466 
2467  Teuchos::BLAS<int,FadType> sacado_blas(false);
2468  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2469  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2470 
2471  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2472 
2473  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2474  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2475  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2476  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2477 
2478  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2479 }
2480 
2481 // Tests with constant C
2483  typedef decltype(this->fad) FadType;
2484  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2485  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2486  auto m = this->m_;
2487  auto n = this->n_;
2488  auto l = this->l_;
2489  auto ndot = this->ndot_;
2490 
2491  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2492  for (unsigned int j=0; j<l; j++) {
2493  for (unsigned int i=0; i<m; i++) {
2494  A[i+j*m] = FadType(ndot, this->urand.number());
2495  for (unsigned int k=0; k<ndot; k++)
2496  A[i+j*m].fastAccessDx(k) = this->urand.number();
2497  }
2498  }
2499  for (unsigned int j=0; j<n; j++) {
2500  for (unsigned int i=0; i<l; i++) {
2501  B[i+j*l] = FadType(ndot, this->urand.number());
2502  for (unsigned int k=0; k<ndot; k++)
2503  B[i+j*l].fastAccessDx(k) = this->urand.number();
2504  }
2505  }
2506  FadType alpha(ndot, this->urand.number());
2507  FadType beta(ndot, this->urand.number());
2508  for (unsigned int k=0; k<ndot; k++) {
2509  alpha.fastAccessDx(k) = this->urand.number();
2510  beta.fastAccessDx(k) = this->urand.number();
2511  }
2512 
2513  for (unsigned int j=0; j<n; j++) {
2514  for (unsigned int i=0; i<m; i++) {
2515  ScalarType val = this->urand.number();
2516  C1[i+j*m] = val;
2517  C2[i+j*m] = val;
2518  C3[i+j*m] = val;
2519  }
2520  }
2521 
2522  Teuchos::BLAS<int,FadType> teuchos_blas;
2523  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2524  &A[0], m, &B[0], l, beta, &C1[0], m);
2525 
2526  Teuchos::BLAS<int,FadType> sacado_blas(false);
2527  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2528  &A[0], m, &B[0], l, beta, &C2[0], m);
2529 
2530  COMPARE_FAD_VECTORS(C1, C2, m*n);
2531 
2532  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2533  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2534  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2535  &A[0], m, &B[0], l, beta, &C3[0], m);
2536 
2537  COMPARE_FAD_VECTORS(C1, C3, m*n);
2538 
2539  // transa
2540  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2541  &A[0], l, &B[0], l, beta, &C1[0], m);
2542  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2543  &A[0], l, &B[0], l, beta, &C2[0], m);
2544  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2545  &A[0], l, &B[0], l, beta, &C3[0], m);
2546 
2547  COMPARE_FAD_VECTORS(C1, C2, m*n);
2548  COMPARE_FAD_VECTORS(C1, C3, m*n);
2549 
2550  // transb
2551  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2552  &A[0], m, &B[0], n, beta, &C1[0], m);
2553  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2554  &A[0], m, &B[0], n, beta, &C2[0], m);
2555  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2556  &A[0], m, &B[0], n, beta, &C3[0], m);
2557 
2558  COMPARE_FAD_VECTORS(C1, C2, m*n);
2559  COMPARE_FAD_VECTORS(C1, C3, m*n);
2560 
2561  // transa and transb
2562  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2563  &A[0], l, &B[0], n, beta, &C1[0], m);
2564  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2565  &A[0], l, &B[0], n, beta, &C2[0], m);
2566  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2567  &A[0], l, &B[0], n, beta, &C3[0], m);
2568 
2569  COMPARE_FAD_VECTORS(C1, C2, m*n);
2570  COMPARE_FAD_VECTORS(C1, C3, m*n);
2571 }
2572 
2573 // Tests with constant alpha, beta
2575  typedef decltype(this->fad) FadType;
2576  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2577  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2578  auto m = this->m_;
2579  auto n = this->n_;
2580  auto l = this->l_;
2581  auto ndot = this->ndot_;
2582 
2583  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2584  for (unsigned int j=0; j<l; j++) {
2585  for (unsigned int i=0; i<m; i++) {
2586  A[i+j*m] = FadType(ndot, this->urand.number());
2587  for (unsigned int k=0; k<ndot; k++)
2588  A[i+j*m].fastAccessDx(k) = this->urand.number();
2589  }
2590  }
2591  for (unsigned int j=0; j<n; j++) {
2592  for (unsigned int i=0; i<l; i++) {
2593  B[i+j*l] = FadType(ndot, this->urand.number());
2594  for (unsigned int k=0; k<ndot; k++)
2595  B[i+j*l].fastAccessDx(k) = this->urand.number();
2596  }
2597  }
2598  ScalarType alpha = this->urand.number();
2599  ScalarType beta = this->urand.number();
2600 
2601  for (unsigned int j=0; j<n; j++) {
2602  for (unsigned int i=0; i<m; i++) {
2603  ScalarType val = this->urand.number();
2604  C1[i+j*m] = FadType(ndot, val);
2605  C2[i+j*m] = FadType(ndot, val);
2606  C3[i+j*m] = FadType(ndot, val);
2607  for (unsigned int k=0; k<ndot; k++) {
2608  val = this->urand.number();
2609  C1[i+j*m].fastAccessDx(k) = val;
2610  C2[i+j*m].fastAccessDx(k) = val;
2611  C3[i+j*m].fastAccessDx(k) = val;
2612  }
2613  }
2614  }
2615 
2616  Teuchos::BLAS<int,FadType> teuchos_blas;
2617  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2618  &A[0], m, &B[0], l, beta, &C1[0], m);
2619 
2620  Teuchos::BLAS<int,FadType> sacado_blas(false);
2621  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2622  &A[0], m, &B[0], l, beta, &C2[0], m);
2623 
2624  COMPARE_FAD_VECTORS(C1, C2, m*n);
2625 
2626  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2627  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2628  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2629  &A[0], m, &B[0], l, beta, &C3[0], m);
2630 
2631  COMPARE_FAD_VECTORS(C1, C3, m*n);
2632 
2633  // transa
2634  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2635  &A[0], l, &B[0], l, beta, &C1[0], m);
2636  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2637  &A[0], l, &B[0], l, beta, &C2[0], m);
2638  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2639  &A[0], l, &B[0], l, beta, &C3[0], m);
2640 
2641  COMPARE_FAD_VECTORS(C1, C2, m*n);
2642  COMPARE_FAD_VECTORS(C1, C3, m*n);
2643 
2644  // transb
2645  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2646  &A[0], m, &B[0], n, beta, &C1[0], m);
2647  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2648  &A[0], m, &B[0], n, beta, &C2[0], m);
2649  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2650  &A[0], m, &B[0], n, beta, &C3[0], m);
2651 
2652  COMPARE_FAD_VECTORS(C1, C2, m*n);
2653  COMPARE_FAD_VECTORS(C1, C3, m*n);
2654 
2655  // transa and transb
2656  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2657  &A[0], l, &B[0], n, beta, &C1[0], m);
2658  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2659  &A[0], l, &B[0], n, beta, &C2[0], m);
2660  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2661  &A[0], l, &B[0], n, beta, &C3[0], m);
2662 
2663  COMPARE_FAD_VECTORS(C1, C2, m*n);
2664  COMPARE_FAD_VECTORS(C1, C3, m*n);
2665 }
2666 
2667 // Tests with constant A
2669  typedef decltype(this->fad) FadType;
2670  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2671  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2672  auto m = this->m_;
2673  auto n = this->n_;
2674  auto l = this->l_;
2675  auto ndot = this->ndot_;
2676 
2677  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2678  C4(m*n,ndot), C5(m*n,ndot);
2679  std::vector<ScalarType> a(m*l);
2680  for (unsigned int j=0; j<l; j++) {
2681  for (unsigned int i=0; i<m; i++) {
2682  a[i+j*m] = this->urand.number();
2683  A[i+j*m] = a[i+j*m];
2684  }
2685  }
2686  for (unsigned int j=0; j<n; j++) {
2687  for (unsigned int i=0; i<l; i++) {
2688  B[i+j*l] = FadType(ndot, this->urand.number());
2689  for (unsigned int k=0; k<ndot; k++)
2690  B[i+j*l].fastAccessDx(k) = this->urand.number();
2691  }
2692  }
2693  FadType alpha(ndot, this->urand.number());
2694  FadType beta(ndot, this->urand.number());
2695  for (unsigned int k=0; k<ndot; k++) {
2696  alpha.fastAccessDx(k) = this->urand.number();
2697  beta.fastAccessDx(k) = this->urand.number();
2698  }
2699 
2700  for (unsigned int j=0; j<n; j++) {
2701  for (unsigned int i=0; i<m; i++) {
2702  ScalarType val = this->urand.number();
2703  C1[i+j*m] = FadType(ndot, val);
2704  C2[i+j*m] = FadType(ndot, val);
2705  C3[i+j*m] = FadType(ndot, val);
2706  C4[i+j*m] = FadType(ndot, val);
2707  C5[i+j*m] = FadType(ndot, val);
2708  for (unsigned int k=0; k<ndot; k++) {
2709  val = this->urand.number();
2710  C1[i+j*m].fastAccessDx(k) = val;
2711  C2[i+j*m].fastAccessDx(k) = val;
2712  C3[i+j*m].fastAccessDx(k) = val;
2713  C4[i+j*m].fastAccessDx(k) = val;
2714  C5[i+j*m].fastAccessDx(k) = val;
2715  }
2716  }
2717  }
2718 
2719  Teuchos::BLAS<int,FadType> teuchos_blas;
2720  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2721  &A[0], m, &B[0], l, beta, &C1[0], m);
2722 
2723  Teuchos::BLAS<int,FadType> sacado_blas(false);
2724  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2725  &A[0], m, &B[0], l, beta, &C2[0], m);
2726 
2727  COMPARE_FAD_VECTORS(C1, C2, m*n);
2728 
2729  unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2730  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2731  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2732  &A[0], m, &B[0], l, beta, &C3[0], m);
2733 
2734  COMPARE_FAD_VECTORS(C1, C3, m*n);
2735 
2736  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2737  &a[0], m, &B[0], l, beta, &C4[0], m);
2738 
2739  COMPARE_FAD_VECTORS(C1, C4, m*n);
2740 
2741  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2742  &a[0], m, &B[0], l, beta, &C5[0], m);
2743 
2744  COMPARE_FAD_VECTORS(C1, C5, m*n);
2745 
2746  // transa
2747  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2748  &A[0], l, &B[0], l, beta, &C1[0], m);
2749  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2750  &A[0], l, &B[0], l, beta, &C2[0], m);
2751  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2752  &A[0], l, &B[0], l, beta, &C3[0], m);
2753  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2754  &a[0], l, &B[0], l, beta, &C4[0], m);
2755  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2756  &a[0], l, &B[0], l, beta, &C5[0], m);
2757 
2758  COMPARE_FAD_VECTORS(C1, C2, m*n);
2759  COMPARE_FAD_VECTORS(C1, C3, m*n);
2760  COMPARE_FAD_VECTORS(C1, C4, m*n);
2761  COMPARE_FAD_VECTORS(C1, C5, m*n);
2762 
2763  // transb
2764  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2765  &A[0], m, &B[0], n, beta, &C1[0], m);
2766  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2767  &A[0], m, &B[0], n, beta, &C2[0], m);
2768  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2769  &A[0], m, &B[0], n, beta, &C3[0], m);
2770  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2771  &a[0], m, &B[0], n, beta, &C4[0], m);
2772  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2773  &a[0], m, &B[0], n, beta, &C5[0], m);
2774 
2775  COMPARE_FAD_VECTORS(C1, C2, m*n);
2776  COMPARE_FAD_VECTORS(C1, C3, m*n);
2777  COMPARE_FAD_VECTORS(C1, C4, m*n);
2778  COMPARE_FAD_VECTORS(C1, C5, m*n);
2779 
2780  // transa and transb
2781  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2782  &A[0], l, &B[0], n, beta, &C1[0], m);
2783  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2784  &A[0], l, &B[0], n, beta, &C2[0], m);
2785  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2786  &A[0], l, &B[0], n, beta, &C3[0], m);
2787  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2788  &a[0], l, &B[0], n, beta, &C4[0], m);
2789  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2790  &a[0], l, &B[0], n, beta, &C5[0], m);
2791 
2792  COMPARE_FAD_VECTORS(C1, C2, m*n);
2793  COMPARE_FAD_VECTORS(C1, C3, m*n);
2794  COMPARE_FAD_VECTORS(C1, C4, m*n);
2795  COMPARE_FAD_VECTORS(C1, C5, m*n);
2796 }
2797 
2798 // Tests with constant B
2800  typedef decltype(this->fad) FadType;
2801  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2802  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2803  auto m = this->m_;
2804  auto n = this->n_;
2805  auto l = this->l_;
2806  auto ndot = this->ndot_;
2807 
2808  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2809  C4(m*n,ndot), C5(m*n,ndot);
2810  std::vector<ScalarType> b(l*n);
2811  for (unsigned int j=0; j<l; j++) {
2812  for (unsigned int i=0; i<m; i++) {
2813  A[i+j*m] = FadType(ndot, this->urand.number());
2814  for (unsigned int k=0; k<ndot; k++)
2815  A[i+j*m].fastAccessDx(k) = this->urand.number();
2816  }
2817  }
2818  for (unsigned int j=0; j<n; j++) {
2819  for (unsigned int i=0; i<l; i++) {
2820  b[i+j*l] = this->urand.number();
2821  B[i+j*l] = b[i+j*l];
2822  }
2823  }
2824  FadType alpha(ndot, this->urand.number());
2825  FadType beta(ndot, this->urand.number());
2826  for (unsigned int k=0; k<ndot; k++) {
2827  alpha.fastAccessDx(k) = this->urand.number();
2828  beta.fastAccessDx(k) = this->urand.number();
2829  }
2830 
2831  for (unsigned int j=0; j<n; j++) {
2832  for (unsigned int i=0; i<m; i++) {
2833  ScalarType val = this->urand.number();
2834  C1[i+j*m] = FadType(ndot, val);
2835  C2[i+j*m] = FadType(ndot, val);
2836  C3[i+j*m] = FadType(ndot, val);
2837  C4[i+j*m] = FadType(ndot, val);
2838  C5[i+j*m] = FadType(ndot, val);
2839  for (unsigned int k=0; k<ndot; k++) {
2840  val = this->urand.number();
2841  C1[i+j*m].fastAccessDx(k) = val;
2842  C2[i+j*m].fastAccessDx(k) = val;
2843  C3[i+j*m].fastAccessDx(k) = val;
2844  C4[i+j*m].fastAccessDx(k) = val;
2845  C5[i+j*m].fastAccessDx(k) = val;
2846  }
2847  }
2848  }
2849 
2850  Teuchos::BLAS<int,FadType> teuchos_blas;
2851  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2852  &A[0], m, &B[0], l, beta, &C1[0], m);
2853 
2854  Teuchos::BLAS<int,FadType> sacado_blas(false);
2855  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2856  &A[0], m, &B[0], l, beta, &C2[0], m);
2857 
2858  COMPARE_FAD_VECTORS(C1, C2, m*n);
2859 
2860  unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2861  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2862  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2863  &A[0], m, &B[0], l, beta, &C3[0], m);
2864 
2865  COMPARE_FAD_VECTORS(C1, C3, m*n);
2866 
2867  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2868  &A[0], m, &b[0], l, beta, &C4[0], m);
2869 
2870  COMPARE_FAD_VECTORS(C1, C4, m*n);
2871 
2872  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2873  &A[0], m, &b[0], l, beta, &C5[0], m);
2874 
2875  COMPARE_FAD_VECTORS(C1, C5, m*n);
2876 
2877  // transa
2878  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2879  &A[0], l, &B[0], l, beta, &C1[0], m);
2880  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2881  &A[0], l, &B[0], l, beta, &C2[0], m);
2882  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2883  &A[0], l, &B[0], l, beta, &C3[0], m);
2884  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2885  &A[0], l, &b[0], l, beta, &C4[0], m);
2886  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2887  &A[0], l, &b[0], l, beta, &C5[0], m);
2888 
2889  COMPARE_FAD_VECTORS(C1, C2, m*n);
2890  COMPARE_FAD_VECTORS(C1, C3, m*n);
2891  COMPARE_FAD_VECTORS(C1, C4, m*n);
2892  COMPARE_FAD_VECTORS(C1, C5, m*n);
2893 
2894  // transb
2895  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2896  &A[0], m, &B[0], n, beta, &C1[0], m);
2897  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2898  &A[0], m, &B[0], n, beta, &C2[0], m);
2899  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2900  &A[0], m, &B[0], n, beta, &C3[0], m);
2901  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2902  &A[0], m, &b[0], n, beta, &C4[0], m);
2903  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2904  &A[0], m, &b[0], n, beta, &C5[0], m);
2905 
2906  COMPARE_FAD_VECTORS(C1, C2, m*n);
2907  COMPARE_FAD_VECTORS(C1, C3, m*n);
2908  COMPARE_FAD_VECTORS(C1, C4, m*n);
2909  COMPARE_FAD_VECTORS(C1, C5, m*n);
2910 
2911  // transa and transb
2912  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2913  &A[0], l, &B[0], n, beta, &C1[0], m);
2914  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2915  &A[0], l, &B[0], n, beta, &C2[0], m);
2916  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2917  &A[0], l, &B[0], n, beta, &C3[0], m);
2918  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2919  &A[0], l, &b[0], n, beta, &C4[0], m);
2920  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2921  &A[0], l, &b[0], n, beta, &C5[0], m);
2922 
2923  COMPARE_FAD_VECTORS(C1, C2, m*n);
2924  COMPARE_FAD_VECTORS(C1, C3, m*n);
2925  COMPARE_FAD_VECTORS(C1, C4, m*n);
2926  COMPARE_FAD_VECTORS(C1, C5, m*n);
2927 }
2928 
2929 // Tests with constant A and B
2931  typedef decltype(this->fad) FadType;
2932  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2933  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2934  auto m = this->m_;
2935  auto n = this->n_;
2936  auto l = this->l_;
2937  auto ndot = this->ndot_;
2938 
2939  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2940  C4(m*n,ndot), C5(m*n,ndot);
2941  std::vector<ScalarType> a(m*l), b(l*n);
2942  for (unsigned int j=0; j<l; j++) {
2943  for (unsigned int i=0; i<m; i++) {
2944  a[i+j*m] = this->urand.number();
2945  A[i+j*m] = a[i+j*m];
2946  }
2947  }
2948  for (unsigned int j=0; j<n; j++) {
2949  for (unsigned int i=0; i<l; i++) {
2950  b[i+j*l] = this->urand.number();
2951  B[i+j*l] = b[i+j*l];
2952  }
2953  }
2954  FadType alpha(ndot, this->urand.number());
2955  FadType beta(ndot, this->urand.number());
2956  for (unsigned int k=0; k<ndot; k++) {
2957  alpha.fastAccessDx(k) = this->urand.number();
2958  beta.fastAccessDx(k) = this->urand.number();
2959  }
2960 
2961  for (unsigned int j=0; j<n; j++) {
2962  for (unsigned int i=0; i<m; i++) {
2963  ScalarType val = this->urand.number();
2964  C1[i+j*m] = FadType(ndot, val);
2965  C2[i+j*m] = FadType(ndot, val);
2966  C3[i+j*m] = FadType(ndot, val);
2967  C4[i+j*m] = FadType(ndot, val);
2968  C5[i+j*m] = FadType(ndot, val);
2969  for (unsigned int k=0; k<ndot; k++) {
2970  val = this->urand.number();
2971  C1[i+j*m].fastAccessDx(k) = val;
2972  C2[i+j*m].fastAccessDx(k) = val;
2973  C3[i+j*m].fastAccessDx(k) = val;
2974  C4[i+j*m].fastAccessDx(k) = val;
2975  C5[i+j*m].fastAccessDx(k) = val;
2976  }
2977  }
2978  }
2979 
2980  Teuchos::BLAS<int,FadType> teuchos_blas;
2981  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2982  &A[0], m, &B[0], l, beta, &C1[0], m);
2983 
2984  Teuchos::BLAS<int,FadType> sacado_blas(false);
2985  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2986  &A[0], m, &B[0], l, beta, &C2[0], m);
2987 
2988  COMPARE_FAD_VECTORS(C1, C2, m*n);
2989 
2990  unsigned int sz = m*l + l*n + m*n*(1+ndot);
2991  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2992  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2993  &A[0], m, &B[0], l, beta, &C3[0], m);
2994 
2995  COMPARE_FAD_VECTORS(C1, C3, m*n);
2996 
2997  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2998  &a[0], m, &b[0], l, beta, &C4[0], m);
2999 
3000  COMPARE_FAD_VECTORS(C1, C4, m*n);
3001 
3002  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3003  &a[0], m, &b[0], l, beta, &C5[0], m);
3004 
3005  COMPARE_FAD_VECTORS(C1, C5, m*n);
3006 
3007  // transa
3008  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3009  &A[0], l, &B[0], l, beta, &C1[0], m);
3010  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3011  &A[0], l, &B[0], l, beta, &C2[0], m);
3012  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3013  &A[0], l, &B[0], l, beta, &C3[0], m);
3014  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3015  &a[0], l, &b[0], l, beta, &C4[0], m);
3016  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3017  &a[0], l, &b[0], l, beta, &C5[0], m);
3018 
3019  COMPARE_FAD_VECTORS(C1, C2, m*n);
3020  COMPARE_FAD_VECTORS(C1, C3, m*n);
3021  COMPARE_FAD_VECTORS(C1, C4, m*n);
3022  COMPARE_FAD_VECTORS(C1, C5, m*n);
3023 
3024  // transb
3025  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3026  &A[0], m, &B[0], n, beta, &C1[0], m);
3027  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3028  &A[0], m, &B[0], n, beta, &C2[0], m);
3029  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3030  &A[0], m, &B[0], n, beta, &C3[0], m);
3031  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3032  &a[0], m, &b[0], n, beta, &C4[0], m);
3033  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3034  &a[0], m, &b[0], n, beta, &C5[0], m);
3035 
3036  COMPARE_FAD_VECTORS(C1, C2, m*n);
3037  COMPARE_FAD_VECTORS(C1, C3, m*n);
3038  COMPARE_FAD_VECTORS(C1, C4, m*n);
3039  COMPARE_FAD_VECTORS(C1, C5, m*n);
3040 
3041  // transa and transb
3042  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3043  &A[0], l, &B[0], n, beta, &C1[0], m);
3044  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3045  &A[0], l, &B[0], n, beta, &C2[0], m);
3046  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3047  &A[0], l, &B[0], n, beta, &C3[0], m);
3048  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3049  &a[0], l, &b[0], n, beta, &C4[0], m);
3050  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3051  &a[0], l, &b[0], n, beta, &C5[0], m);
3052 
3053  COMPARE_FAD_VECTORS(C1, C2, m*n);
3054  COMPARE_FAD_VECTORS(C1, C3, m*n);
3055  COMPARE_FAD_VECTORS(C1, C4, m*n);
3056  COMPARE_FAD_VECTORS(C1, C5, m*n);
3057 }
3058 
3059 // Tests all arguments, left side
3061  typedef decltype(this->fad) FadType;
3062  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3063  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3064  auto m = this->m_;
3065  auto n = this->n_;
3066  auto ndot = this->ndot_;
3067 
3068 
3069  // SYMM is apparently not defined in the BLAS for complex types
3071  return;
3072 
3073  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3074  for (unsigned int j=0; j<m; j++) {
3075  for (unsigned int i=0; i<m; i++) {
3076  A[i+j*m] = FadType(ndot, this->urand.number());
3077  for (unsigned int k=0; k<ndot; k++)
3078  A[i+j*m].fastAccessDx(k) = this->urand.number();
3079  }
3080  }
3081  for (unsigned int j=0; j<n; j++) {
3082  for (unsigned int i=0; i<m; i++) {
3083  B[i+j*m] = FadType(ndot, this->urand.number());
3084  for (unsigned int k=0; k<ndot; k++)
3085  B[i+j*m].fastAccessDx(k) = this->urand.number();
3086  }
3087  }
3088  FadType alpha(ndot, this->urand.number());
3089  FadType beta(ndot, this->urand.number());
3090  for (unsigned int k=0; k<ndot; k++) {
3091  alpha.fastAccessDx(k) = this->urand.number();
3092  beta.fastAccessDx(k) = this->urand.number();
3093  }
3094 
3095  for (unsigned int j=0; j<n; j++) {
3096  for (unsigned int i=0; i<m; i++) {
3097  ScalarType val = this->urand.number();
3098  C1[i+j*m] = FadType(ndot, val);
3099  C2[i+j*m] = FadType(ndot, val);
3100  C3[i+j*m] = FadType(ndot, val);
3101  for (unsigned int k=0; k<ndot; k++) {
3102  val = this->urand.number();
3103  C1[i+j*m].fastAccessDx(k) = val;
3104  C2[i+j*m].fastAccessDx(k) = val;
3105  C3[i+j*m].fastAccessDx(k) = val;
3106  }
3107  }
3108  }
3109 
3110  Teuchos::BLAS<int,FadType> teuchos_blas;
3111  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3112  &A[0], m, &B[0], m, beta, &C1[0], m);
3113 
3114  Teuchos::BLAS<int,FadType> sacado_blas(false);
3115  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3116  &A[0], m, &B[0], m, beta, &C2[0], m);
3117 
3118  COMPARE_FAD_VECTORS(C1, C2, m*n);
3119 
3120  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3121  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3122  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3123  &A[0], m, &B[0], m, beta, &C3[0], m);
3124 
3125  COMPARE_FAD_VECTORS(C1, C3, m*n);
3126 
3127  // lower tri
3128  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3129  &A[0], m, &B[0], m, beta, &C1[0], m);
3130  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3131  &A[0], m, &B[0], m, beta, &C2[0], m);
3132  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3133  &A[0], m, &B[0], m, beta, &C3[0], m);
3134 
3135  COMPARE_FAD_VECTORS(C1, C2, m*n);
3136  COMPARE_FAD_VECTORS(C1, C3, m*n);
3137 }
3138 
3139 // Tests all arguments, right side
3141  typedef decltype(this->fad) FadType;
3142  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3143  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3144  auto m = this->m_;
3145  auto n = this->n_;
3146  auto ndot = this->ndot_;
3147 
3148 
3149  // SYMM is apparently not defined in the BLAS for complex types
3151  return;
3152 
3153  VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3154  for (unsigned int j=0; j<n; j++) {
3155  for (unsigned int i=0; i<n; i++) {
3156  A[i+j*n] = FadType(ndot, this->urand.number());
3157  for (unsigned int k=0; k<ndot; k++)
3158  A[i+j*n].fastAccessDx(k) = this->urand.number();
3159  }
3160  }
3161  for (unsigned int j=0; j<n; j++) {
3162  for (unsigned int i=0; i<m; i++) {
3163  B[i+j*m] = FadType(ndot, this->urand.number());
3164  for (unsigned int k=0; k<ndot; k++)
3165  B[i+j*m].fastAccessDx(k) = this->urand.number();
3166  }
3167  }
3168  FadType alpha(ndot, this->urand.number());
3169  FadType beta(ndot, this->urand.number());
3170  for (unsigned int k=0; k<ndot; k++) {
3171  alpha.fastAccessDx(k) = this->urand.number();
3172  beta.fastAccessDx(k) = this->urand.number();
3173  }
3174 
3175  for (unsigned int j=0; j<n; j++) {
3176  for (unsigned int i=0; i<m; i++) {
3177  ScalarType val = this->urand.number();
3178  C1[i+j*m] = FadType(ndot, val);
3179  C2[i+j*m] = FadType(ndot, val);
3180  C3[i+j*m] = FadType(ndot, val);
3181  for (unsigned int k=0; k<ndot; k++) {
3182  val = this->urand.number();
3183  C1[i+j*m].fastAccessDx(k) = val;
3184  C2[i+j*m].fastAccessDx(k) = val;
3185  C3[i+j*m].fastAccessDx(k) = val;
3186  }
3187  }
3188  }
3189 
3190  Teuchos::BLAS<int,FadType> teuchos_blas;
3191  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3192  &A[0], n, &B[0], m, beta, &C1[0], m);
3193 
3194  Teuchos::BLAS<int,FadType> sacado_blas(false);
3195  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3196  &A[0], n, &B[0], m, beta, &C2[0], m);
3197 
3198  COMPARE_FAD_VECTORS(C1, C2, m*n);
3199 
3200  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3201  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3202  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3203  &A[0], n, &B[0], m, beta, &C3[0], m);
3204 
3205  COMPARE_FAD_VECTORS(C1, C3, m*n);
3206 
3207  // lower tri
3208  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3209  &A[0], n, &B[0], m, beta, &C1[0], m);
3210  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3211  &A[0], n, &B[0], m, beta, &C2[0], m);
3212  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3213  &A[0], n, &B[0], m, beta, &C3[0], m);
3214 
3215  COMPARE_FAD_VECTORS(C1, C2, m*n);
3216  COMPARE_FAD_VECTORS(C1, C3, m*n);
3217 }
3218 
3219 // Tests different lda, ldb, ldc, left side
3221  typedef decltype(this->fad) FadType;
3222  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3223  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3224  auto m = this->m_;
3225  auto n = this->n_;
3226  auto ndot = this->ndot_;
3227 
3228 
3229  // SYMM is apparently not defined in the BLAS for complex types
3231  return;
3232 
3233  unsigned int lda = m+4;
3234  unsigned int ldb = m+5;
3235  unsigned int ldc = m+6;
3236  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3237  C3(ldc*n,ndot);
3238  for (unsigned int j=0; j<m; j++) {
3239  for (unsigned int i=0; i<lda; i++) {
3240  A[i+j*lda] = FadType(ndot, this->urand.number());
3241  for (unsigned int k=0; k<ndot; k++)
3242  A[i+j*lda].fastAccessDx(k) = this->urand.number();
3243  }
3244  }
3245  for (unsigned int j=0; j<n; j++) {
3246  for (unsigned int i=0; i<ldb; i++) {
3247  B[i+j*ldb] = FadType(ndot, this->urand.number());
3248  for (unsigned int k=0; k<ndot; k++)
3249  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3250  }
3251  }
3252  FadType alpha(ndot, this->urand.number());
3253  FadType beta(ndot, this->urand.number());
3254  for (unsigned int k=0; k<ndot; k++) {
3255  alpha.fastAccessDx(k) = this->urand.number();
3256  beta.fastAccessDx(k) = this->urand.number();
3257  }
3258 
3259  for (unsigned int j=0; j<n; j++) {
3260  for (unsigned int i=0; i<ldc; i++) {
3261  ScalarType val = this->urand.number();
3262  C1[i+j*ldc] = FadType(ndot, val);
3263  C2[i+j*ldc] = FadType(ndot, val);
3264  C3[i+j*ldc] = FadType(ndot, val);
3265  for (unsigned int k=0; k<ndot; k++) {
3266  val = this->urand.number();
3267  C1[i+j*ldc].fastAccessDx(k) = val;
3268  C2[i+j*ldc].fastAccessDx(k) = val;
3269  C3[i+j*ldc].fastAccessDx(k) = val;
3270  }
3271  }
3272  }
3273 
3274  Teuchos::BLAS<int,FadType> teuchos_blas;
3275  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3276  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3277 
3278  Teuchos::BLAS<int,FadType> sacado_blas(false);
3279  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3280  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3281 
3282  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3283 
3284  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3285  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3286  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3287  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3288 
3289  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3290 
3291  // lower tri
3292  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3293  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3294  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3295  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3296  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3297  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3298 
3299  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3300  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3301 }
3302 
3303 // Tests different lda, ldb, ldc, right side
3305  typedef decltype(this->fad) FadType;
3306  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3307  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3308  auto m = this->m_;
3309  auto n = this->n_;
3310  auto ndot = this->ndot_;
3311 
3312 
3313  // SYMM is apparently not defined in the BLAS for complex types
3315  return;
3316 
3317  unsigned int lda = n+4;
3318  unsigned int ldb = m+5;
3319  unsigned int ldc = m+6;
3320  VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3321  C3(ldc*n,ndot);
3322  for (unsigned int j=0; j<n; j++) {
3323  for (unsigned int i=0; i<lda; i++) {
3324  A[i+j*lda] = FadType(ndot, this->urand.number());
3325  for (unsigned int k=0; k<ndot; k++)
3326  A[i+j*lda].fastAccessDx(k) = this->urand.number();
3327  }
3328  }
3329  for (unsigned int j=0; j<n; j++) {
3330  for (unsigned int i=0; i<ldb; i++) {
3331  B[i+j*ldb] = FadType(ndot, this->urand.number());
3332  for (unsigned int k=0; k<ndot; k++)
3333  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3334  }
3335  }
3336  FadType alpha(ndot, this->urand.number());
3337  FadType beta(ndot, this->urand.number());
3338  for (unsigned int k=0; k<ndot; k++) {
3339  alpha.fastAccessDx(k) = this->urand.number();
3340  beta.fastAccessDx(k) = this->urand.number();
3341  }
3342 
3343  for (unsigned int j=0; j<n; j++) {
3344  for (unsigned int i=0; i<ldc; i++) {
3345  ScalarType val = this->urand.number();
3346  C1[i+j*ldc] = FadType(ndot, val);
3347  C2[i+j*ldc] = FadType(ndot, val);
3348  C3[i+j*ldc] = FadType(ndot, val);
3349  for (unsigned int k=0; k<ndot; k++) {
3350  val = this->urand.number();
3351  C1[i+j*ldc].fastAccessDx(k) = val;
3352  C2[i+j*ldc].fastAccessDx(k) = val;
3353  C3[i+j*ldc].fastAccessDx(k) = val;
3354  }
3355  }
3356  }
3357 
3358  Teuchos::BLAS<int,FadType> teuchos_blas;
3359  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3360  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3361 
3362  Teuchos::BLAS<int,FadType> sacado_blas(false);
3363  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3364  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3365 
3366  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3367 
3368  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3369  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3370  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3371  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3372 
3373  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3374 
3375  // lower tri
3376  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3377  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3378  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3379  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3380  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3381  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3382 
3383  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3384  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3385 }
3386 
3387 // Tests with constant C
3389  typedef decltype(this->fad) FadType;
3390  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3391  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3392  auto m = this->m_;
3393  auto n = this->n_;
3394  auto ndot = this->ndot_;
3395 
3396 
3397  // SYMM is apparently not defined in the BLAS for complex types
3399  return;
3400 
3401  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3402  for (unsigned int j=0; j<m; j++) {
3403  for (unsigned int i=0; i<m; i++) {
3404  A[i+j*m] = FadType(ndot, this->urand.number());
3405  for (unsigned int k=0; k<ndot; k++)
3406  A[i+j*m].fastAccessDx(k) = this->urand.number();
3407  }
3408  }
3409  for (unsigned int j=0; j<n; j++) {
3410  for (unsigned int i=0; i<m; i++) {
3411  B[i+j*m] = FadType(ndot, this->urand.number());
3412  for (unsigned int k=0; k<ndot; k++)
3413  B[i+j*m].fastAccessDx(k) = this->urand.number();
3414  }
3415  }
3416  FadType alpha(ndot, this->urand.number());
3417  FadType beta(ndot, this->urand.number());
3418  for (unsigned int k=0; k<ndot; k++) {
3419  alpha.fastAccessDx(k) = this->urand.number();
3420  beta.fastAccessDx(k) = this->urand.number();
3421  }
3422 
3423  for (unsigned int j=0; j<n; j++) {
3424  for (unsigned int i=0; i<m; i++) {
3425  ScalarType val = this->urand.number();
3426  C1[i+j*m] = val;
3427  C2[i+j*m] = val;
3428  C3[i+j*m] = val;
3429  }
3430  }
3431 
3432  Teuchos::BLAS<int,FadType> teuchos_blas;
3433  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3434  &A[0], m, &B[0], m, beta, &C1[0], m);
3435 
3436  Teuchos::BLAS<int,FadType> sacado_blas(false);
3437  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3438  &A[0], m, &B[0], m, beta, &C2[0], m);
3439 
3440  COMPARE_FAD_VECTORS(C1, C2, m*n);
3441 
3442  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3443  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3444  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3445  &A[0], m, &B[0], m, beta, &C3[0], m);
3446 
3447  COMPARE_FAD_VECTORS(C1, C3, m*n);
3448 
3449  // lower tri
3450  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3451  &A[0], m, &B[0], m, beta, &C1[0], m);
3452  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3453  &A[0], m, &B[0], m, beta, &C2[0], m);
3454  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3455  &A[0], m, &B[0], m, beta, &C3[0], m);
3456 
3457  COMPARE_FAD_VECTORS(C1, C2, m*n);
3458  COMPARE_FAD_VECTORS(C1, C3, m*n);
3459 }
3460 
3461 // Tests with constant alpha, beta
3463  typedef decltype(this->fad) FadType;
3464  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3465  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3466  auto m = this->m_;
3467  auto n = this->n_;
3468  auto ndot = this->ndot_;
3469 
3470 
3471  // SYMM is apparently not defined in the BLAS for complex types
3473  return;
3474 
3475  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3476  for (unsigned int j=0; j<m; j++) {
3477  for (unsigned int i=0; i<m; i++) {
3478  A[i+j*m] = FadType(ndot, this->urand.number());
3479  for (unsigned int k=0; k<ndot; k++)
3480  A[i+j*m].fastAccessDx(k) = this->urand.number();
3481  }
3482  }
3483  for (unsigned int j=0; j<n; j++) {
3484  for (unsigned int i=0; i<m; i++) {
3485  B[i+j*m] = FadType(ndot, this->urand.number());
3486  for (unsigned int k=0; k<ndot; k++)
3487  B[i+j*m].fastAccessDx(k) = this->urand.number();
3488  }
3489  }
3490  ScalarType alpha = this->urand.number();
3491  ScalarType beta = this->urand.number();
3492 
3493  for (unsigned int j=0; j<n; j++) {
3494  for (unsigned int i=0; i<m; i++) {
3495  ScalarType val = this->urand.number();
3496  C1[i+j*m] = FadType(ndot, val);
3497  C2[i+j*m] = FadType(ndot, val);
3498  C3[i+j*m] = FadType(ndot, val);
3499  for (unsigned int k=0; k<ndot; k++) {
3500  val = this->urand.number();
3501  C1[i+j*m].fastAccessDx(k) = val;
3502  C2[i+j*m].fastAccessDx(k) = val;
3503  C3[i+j*m].fastAccessDx(k) = val;
3504  }
3505  }
3506  }
3507 
3508  Teuchos::BLAS<int,FadType> teuchos_blas;
3509  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3510  &A[0], m, &B[0], m, beta, &C1[0], m);
3511 
3512  Teuchos::BLAS<int,FadType> sacado_blas(false);
3513  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3514  &A[0], m, &B[0], m, beta, &C2[0], m);
3515 
3516  COMPARE_FAD_VECTORS(C1, C2, m*n);
3517 
3518  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3519  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3520  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3521  &A[0], m, &B[0], m, beta, &C3[0], m);
3522 
3523  COMPARE_FAD_VECTORS(C1, C3, m*n);
3524 
3525  // lower tri
3526  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3527  &A[0], m, &B[0], m, beta, &C1[0], m);
3528  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3529  &A[0], m, &B[0], m, beta, &C2[0], m);
3530  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3531  &A[0], m, &B[0], m, beta, &C3[0], m);
3532 
3533  COMPARE_FAD_VECTORS(C1, C2, m*n);
3534  COMPARE_FAD_VECTORS(C1, C3, m*n);
3535 }
3536 
3537 // Tests with constant A
3539  typedef decltype(this->fad) FadType;
3540  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3541  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3542  auto m = this->m_;
3543  auto n = this->n_;
3544  auto ndot = this->ndot_;
3545 
3546 
3547  // SYMM is apparently not defined in the BLAS for complex types
3549  return;
3550 
3551  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3552  C4(m*n,ndot), C5(m*n,ndot);
3553  std::vector<ScalarType> a(m*m);
3554  for (unsigned int j=0; j<m; j++) {
3555  for (unsigned int i=0; i<m; i++) {
3556  a[i+j*m] = this->urand.number();
3557  A[i+j*m] = a[i+j*m];
3558  }
3559  }
3560  for (unsigned int j=0; j<n; j++) {
3561  for (unsigned int i=0; i<m; i++) {
3562  B[i+j*m] = FadType(ndot, this->urand.number());
3563  for (unsigned int k=0; k<ndot; k++)
3564  B[i+j*m].fastAccessDx(k) = this->urand.number();
3565  }
3566  }
3567  FadType alpha(ndot, this->urand.number());
3568  FadType beta(ndot, this->urand.number());
3569  for (unsigned int k=0; k<ndot; k++) {
3570  alpha.fastAccessDx(k) = this->urand.number();
3571  beta.fastAccessDx(k) = this->urand.number();
3572  }
3573 
3574  for (unsigned int j=0; j<n; j++) {
3575  for (unsigned int i=0; i<m; i++) {
3576  ScalarType val = this->urand.number();
3577  C1[i+j*m] = FadType(ndot, val);
3578  C2[i+j*m] = FadType(ndot, val);
3579  C3[i+j*m] = FadType(ndot, val);
3580  C4[i+j*m] = FadType(ndot, val);
3581  C5[i+j*m] = FadType(ndot, val);
3582  for (unsigned int k=0; k<ndot; k++) {
3583  val = this->urand.number();
3584  C1[i+j*m].fastAccessDx(k) = val;
3585  C2[i+j*m].fastAccessDx(k) = val;
3586  C3[i+j*m].fastAccessDx(k) = val;
3587  C4[i+j*m].fastAccessDx(k) = val;
3588  C5[i+j*m].fastAccessDx(k) = val;
3589  }
3590  }
3591  }
3592 
3593  Teuchos::BLAS<int,FadType> teuchos_blas;
3594  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3595  &A[0], m, &B[0], m, beta, &C1[0], m);
3596 
3597  Teuchos::BLAS<int,FadType> sacado_blas(false);
3598  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3599  &A[0], m, &B[0], m, beta, &C2[0], m);
3600 
3601  COMPARE_FAD_VECTORS(C1, C2, m*n);
3602 
3603  unsigned int sz = m*m + 2*m*n*(1+ndot);
3604  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3605  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3606  &A[0], m, &B[0], m, beta, &C3[0], m);
3607 
3608  COMPARE_FAD_VECTORS(C1, C3, m*n);
3609 
3610  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3611  &a[0], m, &B[0], m, beta, &C4[0], m);
3612 
3613  COMPARE_FAD_VECTORS(C1, C4, m*n);
3614 
3615  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3616  &a[0], m, &B[0], m, beta, &C5[0], m);
3617 
3618  COMPARE_FAD_VECTORS(C1, C5, m*n);
3619 
3620  // lower tri
3621  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3622  &A[0], m, &B[0], m, beta, &C1[0], m);
3623  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3624  &A[0], m, &B[0], m, beta, &C2[0], m);
3625  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3626  &A[0], m, &B[0], m, beta, &C3[0], m);
3627  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3628  &a[0], m, &B[0], m, beta, &C4[0], m);
3629  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3630  &a[0], m, &B[0], m, beta, &C5[0], m);
3631 
3632  COMPARE_FAD_VECTORS(C1, C2, m*n);
3633  COMPARE_FAD_VECTORS(C1, C3, m*n);
3634  COMPARE_FAD_VECTORS(C1, C4, m*n);
3635  COMPARE_FAD_VECTORS(C1, C5, m*n);
3636 }
3637 
3638 // Tests with constant B
3640  typedef decltype(this->fad) FadType;
3641  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3642  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3643  auto m = this->m_;
3644  auto n = this->n_;
3645  auto ndot = this->ndot_;
3646 
3647 
3648  // SYMM is apparently not defined in the BLAS for complex types
3650  return;
3651 
3652  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3653  C4(m*n,ndot), C5(m*n,ndot);
3654  std::vector<ScalarType> b(m*n);
3655  for (unsigned int j=0; j<m; j++) {
3656  for (unsigned int i=0; i<m; i++) {
3657  A[i+j*m] = FadType(ndot, this->urand.number());
3658  for (unsigned int k=0; k<ndot; k++)
3659  A[i+j*m].fastAccessDx(k) = this->urand.number();
3660  }
3661  }
3662  for (unsigned int j=0; j<n; j++) {
3663  for (unsigned int i=0; i<m; i++) {
3664  b[i+j*m] = this->urand.number();
3665  B[i+j*m] = b[i+j*m];
3666  }
3667  }
3668  FadType alpha(ndot, this->urand.number());
3669  FadType beta(ndot, this->urand.number());
3670  for (unsigned int k=0; k<ndot; k++) {
3671  alpha.fastAccessDx(k) = this->urand.number();
3672  beta.fastAccessDx(k) = this->urand.number();
3673  }
3674 
3675  for (unsigned int j=0; j<n; j++) {
3676  for (unsigned int i=0; i<m; i++) {
3677  ScalarType val = this->urand.number();
3678  C1[i+j*m] = FadType(ndot, val);
3679  C2[i+j*m] = FadType(ndot, val);
3680  C3[i+j*m] = FadType(ndot, val);
3681  C4[i+j*m] = FadType(ndot, val);
3682  C5[i+j*m] = FadType(ndot, val);
3683  for (unsigned int k=0; k<ndot; k++) {
3684  val = this->urand.number();
3685  C1[i+j*m].fastAccessDx(k) = val;
3686  C2[i+j*m].fastAccessDx(k) = val;
3687  C3[i+j*m].fastAccessDx(k) = val;
3688  C4[i+j*m].fastAccessDx(k) = val;
3689  C5[i+j*m].fastAccessDx(k) = val;
3690  }
3691  }
3692  }
3693 
3694  Teuchos::BLAS<int,FadType> teuchos_blas;
3695  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3696  &A[0], m, &B[0], m, beta, &C1[0], m);
3697 
3698  Teuchos::BLAS<int,FadType> sacado_blas(false);
3699  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3700  &A[0], m, &B[0], m, beta, &C2[0], m);
3701 
3702  COMPARE_FAD_VECTORS(C1, C2, m*n);
3703 
3704  unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3705  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3706  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3707  &A[0], m, &B[0], m, beta, &C3[0], m);
3708 
3709  COMPARE_FAD_VECTORS(C1, C3, m*n);
3710 
3711  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3712  &A[0], m, &b[0], m, beta, &C4[0], m);
3713 
3714  COMPARE_FAD_VECTORS(C1, C4, m*n);
3715 
3716  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3717  &A[0], m, &b[0], m, beta, &C5[0], m);
3718 
3719  COMPARE_FAD_VECTORS(C1, C5, m*n);
3720 
3721  // lower tri
3722  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3723  &A[0], m, &B[0], m, beta, &C1[0], m);
3724  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3725  &A[0], m, &B[0], m, beta, &C2[0], m);
3726  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3727  &A[0], m, &B[0], m, beta, &C3[0], m);
3728  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3729  &A[0], m, &b[0], m, beta, &C4[0], m);
3730  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3731  &A[0], m, &b[0], m, beta, &C5[0], m);
3732 
3733  COMPARE_FAD_VECTORS(C1, C2, m*n);
3734  COMPARE_FAD_VECTORS(C1, C3, m*n);
3735  COMPARE_FAD_VECTORS(C1, C4, m*n);
3736  COMPARE_FAD_VECTORS(C1, C5, m*n);
3737 }
3738 
3739 // Tests with constant A and B
3741  typedef decltype(this->fad) FadType;
3742  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3743  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3744  auto m = this->m_;
3745  auto n = this->n_;
3746  auto ndot = this->ndot_;
3747 
3748 
3749  // SYMM is apparently not defined in the BLAS for complex types
3751  return;
3752 
3753  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3754  C4(m*n,ndot), C5(m*n,ndot);
3755  std::vector<ScalarType> a(m*m), b(m*n);
3756  for (unsigned int j=0; j<m; j++) {
3757  for (unsigned int i=0; i<m; i++) {
3758  a[i+j*m] = this->urand.number();
3759  A[i+j*m] = a[i+j*m];
3760  }
3761  }
3762  for (unsigned int j=0; j<n; j++) {
3763  for (unsigned int i=0; i<m; i++) {
3764  b[i+j*m] = this->urand.number();
3765  B[i+j*m] = b[i+j*m];
3766  }
3767  }
3768  FadType alpha(ndot, this->urand.number());
3769  FadType beta(ndot, this->urand.number());
3770  for (unsigned int k=0; k<ndot; k++) {
3771  alpha.fastAccessDx(k) = this->urand.number();
3772  beta.fastAccessDx(k) = this->urand.number();
3773  }
3774 
3775  for (unsigned int j=0; j<n; j++) {
3776  for (unsigned int i=0; i<m; i++) {
3777  ScalarType val = this->urand.number();
3778  C1[i+j*m] = FadType(ndot, val);
3779  C2[i+j*m] = FadType(ndot, val);
3780  C3[i+j*m] = FadType(ndot, val);
3781  C4[i+j*m] = FadType(ndot, val);
3782  C5[i+j*m] = FadType(ndot, val);
3783  for (unsigned int k=0; k<ndot; k++) {
3784  val = this->urand.number();
3785  C1[i+j*m].fastAccessDx(k) = val;
3786  C2[i+j*m].fastAccessDx(k) = val;
3787  C3[i+j*m].fastAccessDx(k) = val;
3788  C4[i+j*m].fastAccessDx(k) = val;
3789  C5[i+j*m].fastAccessDx(k) = val;
3790  }
3791  }
3792  }
3793 
3794  Teuchos::BLAS<int,FadType> teuchos_blas;
3795  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3796  &A[0], m, &B[0], m, beta, &C1[0], m);
3797 
3798  Teuchos::BLAS<int,FadType> sacado_blas(false);
3799  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3800  &A[0], m, &B[0], m, beta, &C2[0], m);
3801 
3802  COMPARE_FAD_VECTORS(C1, C2, m*n);
3803 
3804  unsigned int sz = m*m + m*n*(2+ndot);
3805  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3806  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3807  &A[0], m, &B[0], m, beta, &C3[0], m);
3808 
3809  COMPARE_FAD_VECTORS(C1, C3, m*n);
3810 
3811  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3812  &a[0], m, &b[0], m, beta, &C4[0], m);
3813 
3814  COMPARE_FAD_VECTORS(C1, C4, m*n);
3815 
3816  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3817  &a[0], m, &b[0], m, beta, &C5[0], m);
3818 
3819  COMPARE_FAD_VECTORS(C1, C5, m*n);
3820 
3821  // lower tri
3822  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3823  &A[0], m, &B[0], m, beta, &C1[0], m);
3824  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3825  &A[0], m, &B[0], m, beta, &C2[0], m);
3826  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3827  &A[0], m, &B[0], m, beta, &C3[0], m);
3828  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3829  &a[0], m, &b[0], m, beta, &C4[0], m);
3830  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3831  &a[0], m, &b[0], m, beta, &C5[0], m);
3832 
3833  COMPARE_FAD_VECTORS(C1, C2, m*n);
3834  COMPARE_FAD_VECTORS(C1, C3, m*n);
3835  COMPARE_FAD_VECTORS(C1, C4, m*n);
3836  COMPARE_FAD_VECTORS(C1, C5, m*n);
3837 }
3838 
3839 // Tests all arguments, left side
3841  typedef decltype(this->fad) FadType;
3842  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3843  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3844  auto m = this->m_;
3845  auto n = this->n_;
3846  auto ndot = this->ndot_;
3847 
3848  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3849  for (unsigned int j=0; j<m; j++) {
3850  for (unsigned int i=0; i<m; i++) {
3851  A[i+j*m] = FadType(ndot, this->urand.number());
3852  for (unsigned int k=0; k<ndot; k++)
3853  A[i+j*m].fastAccessDx(k) = this->urand.number();
3854  }
3855  }
3856  FadType alpha(ndot, this->urand.number());
3857  for (unsigned int k=0; k<ndot; k++) {
3858  alpha.fastAccessDx(k) = this->urand.number();
3859  }
3860 
3861  for (unsigned int j=0; j<n; j++) {
3862  for (unsigned int i=0; i<m; i++) {
3863  ScalarType val = this->urand.number();
3864  B1[i+j*m] = FadType(ndot, val);
3865  B2[i+j*m] = FadType(ndot, val);
3866  B3[i+j*m] = FadType(ndot, val);
3867  for (unsigned int k=0; k<ndot; k++) {
3868  val = this->urand.number();
3869  B1[i+j*m].fastAccessDx(k) = val;
3870  B2[i+j*m].fastAccessDx(k) = val;
3871  B3[i+j*m].fastAccessDx(k) = val;
3872  }
3873  }
3874  }
3875 
3876  Teuchos::BLAS<int,FadType> teuchos_blas;
3878  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3879 
3880  Teuchos::BLAS<int,FadType> sacado_blas(false);
3882  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3883 
3884  COMPARE_FAD_VECTORS(B1, B2, m*n);
3885 
3886  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3887  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3889  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3890 
3891  COMPARE_FAD_VECTORS(B1, B3, m*n);
3892 
3894  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3896  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3898  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3899  COMPARE_FAD_VECTORS(B1, B2, m*n);
3900  COMPARE_FAD_VECTORS(B1, B3, m*n);
3901 
3903  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3905  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3907  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3908  COMPARE_FAD_VECTORS(B1, B2, m*n);
3909  COMPARE_FAD_VECTORS(B1, B3, m*n);
3910 
3911  for (unsigned int i=0; i<m; i++) {
3912  A[i*m+i].val() = 1.0;
3913  for (unsigned int k=0; k<ndot; k++)
3914  A[i*m+i].fastAccessDx(k) = 0.0;
3915  }
3917  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3919  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3921  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3922  COMPARE_FAD_VECTORS(B1, B2, m*n);
3923  COMPARE_FAD_VECTORS(B1, B3, m*n);
3924 }
3925 
3926 // Tests all arguments, right side
3928  typedef decltype(this->fad) FadType;
3929  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3930  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3931  auto m = this->m_;
3932  auto n = this->n_;
3933  auto ndot = this->ndot_;
3934 
3935  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3936  for (unsigned int j=0; j<n; j++) {
3937  for (unsigned int i=0; i<n; i++) {
3938  A[i+j*n] = FadType(ndot, this->urand.number());
3939  for (unsigned int k=0; k<ndot; k++)
3940  A[i+j*n].fastAccessDx(k) = this->urand.number();
3941  }
3942  }
3943  FadType alpha(ndot, this->urand.number());
3944  for (unsigned int k=0; k<ndot; k++) {
3945  alpha.fastAccessDx(k) = this->urand.number();
3946  }
3947 
3948  for (unsigned int j=0; j<n; j++) {
3949  for (unsigned int i=0; i<m; i++) {
3950  ScalarType val = this->urand.number();
3951  B1[i+j*m] = FadType(ndot, val);
3952  B2[i+j*m] = FadType(ndot, val);
3953  B3[i+j*m] = FadType(ndot, val);
3954  for (unsigned int k=0; k<ndot; k++) {
3955  val = this->urand.number();
3956  B1[i+j*m].fastAccessDx(k) = val;
3957  B2[i+j*m].fastAccessDx(k) = val;
3958  B3[i+j*m].fastAccessDx(k) = val;
3959  }
3960  }
3961  }
3962 
3963  Teuchos::BLAS<int,FadType> teuchos_blas;
3965  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3966 
3967  Teuchos::BLAS<int,FadType> sacado_blas(false);
3969  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3970 
3971  COMPARE_FAD_VECTORS(B1, B2, m*n);
3972 
3973  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3974  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3976  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3977 
3978  COMPARE_FAD_VECTORS(B1, B3, m*n);
3979 
3981  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3983  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3985  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3986  COMPARE_FAD_VECTORS(B1, B2, m*n);
3987  COMPARE_FAD_VECTORS(B1, B3, m*n);
3988 
3990  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3992  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3994  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3995  COMPARE_FAD_VECTORS(B1, B2, m*n);
3996  COMPARE_FAD_VECTORS(B1, B3, m*n);
3997 
3998  for (unsigned int i=0; i<n; i++) {
3999  A[i*n+i].val() = 1.0;
4000  for (unsigned int k=0; k<ndot; k++)
4001  A[i*n+i].fastAccessDx(k) = 0.0;
4002  }
4004  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4006  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4008  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4009  COMPARE_FAD_VECTORS(B1, B2, m*n);
4010  COMPARE_FAD_VECTORS(B1, B3, m*n);
4011 }
4012 
4013 // Tests all arguments, left side, different lda, ldb
4015  typedef decltype(this->fad) FadType;
4016  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4017  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4018  auto m = this->m_;
4019  auto n = this->n_;
4020  auto ndot = this->ndot_;
4021 
4022  unsigned int lda = m+4;
4023  unsigned int ldb = m+5;
4024  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4025  for (unsigned int j=0; j<m; j++) {
4026  for (unsigned int i=0; i<lda; i++) {
4027  A[i+j*lda] = FadType(ndot, this->urand.number());
4028  for (unsigned int k=0; k<ndot; k++)
4029  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4030  }
4031  }
4032  FadType alpha(ndot, this->urand.number());
4033  for (unsigned int k=0; k<ndot; k++) {
4034  alpha.fastAccessDx(k) = this->urand.number();
4035  }
4036 
4037  for (unsigned int j=0; j<n; j++) {
4038  for (unsigned int i=0; i<ldb; i++) {
4039  ScalarType val = this->urand.number();
4040  B1[i+j*ldb] = FadType(ndot, val);
4041  B2[i+j*ldb] = FadType(ndot, val);
4042  B3[i+j*ldb] = FadType(ndot, val);
4043  for (unsigned int k=0; k<ndot; k++) {
4044  val = this->urand.number();
4045  B1[i+j*ldb].fastAccessDx(k) = val;
4046  B2[i+j*ldb].fastAccessDx(k) = val;
4047  B3[i+j*ldb].fastAccessDx(k) = val;
4048  }
4049  }
4050  }
4051 
4052  Teuchos::BLAS<int,FadType> teuchos_blas;
4054  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4055 
4056  Teuchos::BLAS<int,FadType> sacado_blas(false);
4058  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4059 
4060  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4061 
4062  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4063  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4065  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4066 
4067  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4068 
4070  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4072  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4074  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4075  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4076  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4077 
4079  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4081  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4083  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4084  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4085  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4086 
4087  for (unsigned int i=0; i<m; i++) {
4088  A[i*lda+i].val() = 1.0;
4089  for (unsigned int k=0; k<ndot; k++)
4090  A[i*lda+i].fastAccessDx(k) = 0.0;
4091  }
4093  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4095  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4097  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4098  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4099  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4100 }
4101 
4102 // Tests all arguments, right side, different lda, ldb
4104  typedef decltype(this->fad) FadType;
4105  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4106  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4107  auto m = this->m_;
4108  auto n = this->n_;
4109  auto ndot = this->ndot_;
4110 
4111  unsigned int lda = n+4;
4112  unsigned int ldb = m+5;
4113  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4114  for (unsigned int j=0; j<n; j++) {
4115  for (unsigned int i=0; i<lda; i++) {
4116  A[i+j*lda] = FadType(ndot, this->urand.number());
4117  for (unsigned int k=0; k<ndot; k++)
4118  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4119  }
4120  }
4121  FadType alpha(ndot, this->urand.number());
4122  for (unsigned int k=0; k<ndot; k++) {
4123  alpha.fastAccessDx(k) = this->urand.number();
4124  }
4125 
4126  for (unsigned int j=0; j<n; j++) {
4127  for (unsigned int i=0; i<ldb; i++) {
4128  ScalarType val = this->urand.number();
4129  B1[i+j*ldb] = FadType(ndot, val);
4130  B2[i+j*ldb] = FadType(ndot, val);
4131  B3[i+j*ldb] = FadType(ndot, val);
4132  for (unsigned int k=0; k<ndot; k++) {
4133  val = this->urand.number();
4134  B1[i+j*ldb].fastAccessDx(k) = val;
4135  B2[i+j*ldb].fastAccessDx(k) = val;
4136  B3[i+j*ldb].fastAccessDx(k) = val;
4137  }
4138  }
4139  }
4140 
4141  Teuchos::BLAS<int,FadType> teuchos_blas;
4143  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4144 
4145  Teuchos::BLAS<int,FadType> sacado_blas(false);
4147  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4148 
4149  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4150 
4151  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4152  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4154  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4155 
4156  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4157 
4159  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4161  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4163  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4164  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4165  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4166 
4168  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4170  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4172  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4173  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4174  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4175 
4176  for (unsigned int i=0; i<n; i++) {
4177  A[i*lda+i].val() = 1.0;
4178  for (unsigned int k=0; k<ndot; k++)
4179  A[i*lda+i].fastAccessDx(k) = 0.0;
4180  }
4182  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4184  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4186  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4187  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4188  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4189 }
4190 
4191 // Tests constant alpha
4193  typedef decltype(this->fad) FadType;
4194  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4195  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4196  auto m = this->m_;
4197  auto n = this->n_;
4198  auto ndot = this->ndot_;
4199 
4200  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4201  for (unsigned int j=0; j<m; j++) {
4202  for (unsigned int i=0; i<m; i++) {
4203  A[i+j*m] = FadType(ndot, this->urand.number());
4204  for (unsigned int k=0; k<ndot; k++)
4205  A[i+j*m].fastAccessDx(k) = this->urand.number();
4206  }
4207  }
4208  ScalarType alpha = this->urand.number();
4209 
4210  for (unsigned int j=0; j<n; j++) {
4211  for (unsigned int i=0; i<m; i++) {
4212  ScalarType val = this->urand.number();
4213  B1[i+j*m] = FadType(ndot, val);
4214  B2[i+j*m] = FadType(ndot, val);
4215  B3[i+j*m] = FadType(ndot, val);
4216  for (unsigned int k=0; k<ndot; k++) {
4217  val = this->urand.number();
4218  B1[i+j*m].fastAccessDx(k) = val;
4219  B2[i+j*m].fastAccessDx(k) = val;
4220  B3[i+j*m].fastAccessDx(k) = val;
4221  }
4222  }
4223  }
4224 
4225  Teuchos::BLAS<int,FadType> teuchos_blas;
4227  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4228 
4229  Teuchos::BLAS<int,FadType> sacado_blas(false);
4231  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4232 
4233  COMPARE_FAD_VECTORS(B1, B2, m*n);
4234 
4235  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4236  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4238  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4239 
4240  COMPARE_FAD_VECTORS(B1, B3, m*n);
4241 
4243  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4245  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4247  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4248  COMPARE_FAD_VECTORS(B1, B2, m*n);
4249  COMPARE_FAD_VECTORS(B1, B3, m*n);
4250 
4252  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4254  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4256  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4257  COMPARE_FAD_VECTORS(B1, B2, m*n);
4258  COMPARE_FAD_VECTORS(B1, B3, m*n);
4259 
4260  for (unsigned int i=0; i<m; i++) {
4261  A[i*m+i].val() = 1.0;
4262  for (unsigned int k=0; k<ndot; k++)
4263  A[i*m+i].fastAccessDx(k) = 0.0;
4264  }
4266  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4268  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4270  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4271  COMPARE_FAD_VECTORS(B1, B2, m*n);
4272  COMPARE_FAD_VECTORS(B1, B3, m*n);
4273 }
4274 
4275 // Tests constant B
4277  typedef decltype(this->fad) FadType;
4278  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4279  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4280  auto m = this->m_;
4281  auto n = this->n_;
4282  auto ndot = this->ndot_;
4283 
4284  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4285  for (unsigned int j=0; j<m; j++) {
4286  for (unsigned int i=0; i<m; i++) {
4287  A[i+j*m] = FadType(ndot, this->urand.number());
4288  for (unsigned int k=0; k<ndot; k++)
4289  A[i+j*m].fastAccessDx(k) = this->urand.number();
4290  }
4291  }
4292  FadType alpha(ndot, this->urand.number());
4293  for (unsigned int k=0; k<ndot; k++) {
4294  alpha.fastAccessDx(k) = this->urand.number();
4295  }
4296 
4297  for (unsigned int j=0; j<n; j++) {
4298  for (unsigned int i=0; i<m; i++) {
4299  ScalarType val = this->urand.number();
4300  B1[i+j*m] = val;
4301  B2[i+j*m] = val;
4302  B3[i+j*m] = val;
4303  }
4304  }
4305 
4306  Teuchos::BLAS<int,FadType> teuchos_blas;
4308  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4309 
4310  Teuchos::BLAS<int,FadType> sacado_blas(false);
4312  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4313 
4314  COMPARE_FAD_VECTORS(B1, B2, m*n);
4315 
4316  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4317  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4319  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4320 
4321  COMPARE_FAD_VECTORS(B1, B3, m*n);
4322 
4324  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4326  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4328  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4329  COMPARE_FAD_VECTORS(B1, B2, m*n);
4330  COMPARE_FAD_VECTORS(B1, B3, m*n);
4331 
4333  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4335  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4337  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4338  COMPARE_FAD_VECTORS(B1, B2, m*n);
4339  COMPARE_FAD_VECTORS(B1, B3, m*n);
4340 
4341  for (unsigned int i=0; i<m; i++) {
4342  A[i*m+i].val() = 1.0;
4343  for (unsigned int k=0; k<ndot; k++)
4344  A[i*m+i].fastAccessDx(k) = 0.0;
4345  }
4347  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4349  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4351  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4352  COMPARE_FAD_VECTORS(B1, B2, m*n);
4353  COMPARE_FAD_VECTORS(B1, B3, m*n);
4354 }
4355 
4356 // Tests constant A
4358  typedef decltype(this->fad) FadType;
4359  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4360  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4361  auto m = this->m_;
4362  auto n = this->n_;
4363  auto ndot = this->ndot_;
4364 
4365  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4366  B4(m*n,ndot), B5(m*n,ndot);
4367  std::vector<ScalarType> a(m*m);
4368  for (unsigned int j=0; j<m; j++) {
4369  for (unsigned int i=0; i<m; i++) {
4370  a[i+j*m] = this->urand.number();
4371  A[i+j*m] = a[i+j*m];
4372  }
4373  }
4374  FadType alpha(ndot, this->urand.number());
4375  for (unsigned int k=0; k<ndot; k++) {
4376  alpha.fastAccessDx(k) = this->urand.number();
4377  }
4378 
4379  for (unsigned int j=0; j<n; j++) {
4380  for (unsigned int i=0; i<m; i++) {
4381  ScalarType val = this->urand.number();
4382  B1[i+j*m] = FadType(ndot, val);
4383  B2[i+j*m] = FadType(ndot, val);
4384  B3[i+j*m] = FadType(ndot, val);
4385  B4[i+j*m] = FadType(ndot, val);
4386  B5[i+j*m] = FadType(ndot, val);
4387  for (unsigned int k=0; k<ndot; k++) {
4388  val = this->urand.number();
4389  B1[i+j*m].fastAccessDx(k) = val;
4390  B2[i+j*m].fastAccessDx(k) = val;
4391  B3[i+j*m].fastAccessDx(k) = val;
4392  B4[i+j*m].fastAccessDx(k) = val;
4393  B5[i+j*m].fastAccessDx(k) = val;
4394  }
4395  }
4396  }
4397 
4398  Teuchos::BLAS<int,FadType> teuchos_blas;
4400  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4401 
4402  Teuchos::BLAS<int,FadType> sacado_blas(false);
4404  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4405 
4406  COMPARE_FAD_VECTORS(B1, B2, m*n);
4407 
4408  unsigned int sz = m*m + m*n*(1+ndot);
4409  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4411  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4412 
4413  COMPARE_FAD_VECTORS(B1, B3, m*n);
4414 
4416  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4417 
4418  COMPARE_FAD_VECTORS(B1, B4, m*n);
4419 
4421  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4422 
4423  COMPARE_FAD_VECTORS(B1, B5, m*n);
4424 
4426  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4428  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4430  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4432  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4434  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4435  COMPARE_FAD_VECTORS(B1, B2, m*n);
4436  COMPARE_FAD_VECTORS(B1, B3, m*n);
4437  COMPARE_FAD_VECTORS(B1, B4, m*n);
4438  COMPARE_FAD_VECTORS(B1, B5, m*n);
4439 
4441  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4443  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4445  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4447  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4449  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4450  COMPARE_FAD_VECTORS(B1, B2, m*n);
4451  COMPARE_FAD_VECTORS(B1, B3, m*n);
4452  COMPARE_FAD_VECTORS(B1, B4, m*n);
4453  COMPARE_FAD_VECTORS(B1, B5, m*n);
4454 
4455  for (unsigned int i=0; i<m; i++) {
4456  A[i*m+i].val() = 1.0;
4457  for (unsigned int k=0; k<ndot; k++)
4458  A[i*m+i].fastAccessDx(k) = 0.0;
4459  }
4461  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4463  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4465  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4467  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4469  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4470  COMPARE_FAD_VECTORS(B1, B2, m*n);
4471  COMPARE_FAD_VECTORS(B1, B3, m*n);
4472  COMPARE_FAD_VECTORS(B1, B4, m*n);
4473  COMPARE_FAD_VECTORS(B1, B5, m*n);
4474 }
4475 
4476 // Tests all arguments, left side
4478  typedef decltype(this->fad) FadType;
4479  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4480  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4481  auto m = this->m_;
4482  auto n = this->n_;
4483  auto ndot = this->ndot_;
4484 
4485  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4486  for (unsigned int j=0; j<m; j++) {
4487  for (unsigned int i=0; i<m; i++) {
4488  //A[i+j*m] = this->urand.number();
4489  A[i+j*m] = FadType(ndot, this->urand.number());
4490  for (unsigned int k=0; k<ndot; k++)
4491  A[i+j*m].fastAccessDx(k) = this->urand.number();
4492  }
4493  }
4494  FadType alpha(ndot, this->urand.number());
4495  for (unsigned int k=0; k<ndot; k++) {
4496  alpha.fastAccessDx(k) = this->urand.number();
4497  }
4498  //ScalarType alpha = this->urand.number();
4499 
4500  for (unsigned int j=0; j<n; j++) {
4501  for (unsigned int i=0; i<m; i++) {
4502  ScalarType val = this->urand.number();
4503  // B1[i+j*m] = val;
4504  // B2[i+j*m] = val;
4505  // B3[i+j*m] = val;
4506  B1[i+j*m] = FadType(ndot, val);
4507  B2[i+j*m] = FadType(ndot, val);
4508  B3[i+j*m] = FadType(ndot, val);
4509  for (unsigned int k=0; k<ndot; k++) {
4510  val = this->urand.number();
4511  B1[i+j*m].fastAccessDx(k) = val;
4512  B2[i+j*m].fastAccessDx(k) = val;
4513  B3[i+j*m].fastAccessDx(k) = val;
4514  }
4515  }
4516  }
4517 
4518  Teuchos::BLAS<int,FadType> teuchos_blas;
4520  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4521 
4522  Teuchos::BLAS<int,FadType> sacado_blas(false);
4524  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4525 
4526  COMPARE_FAD_VECTORS(B1, B2, m*n);
4527 
4528  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4529  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4531  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4532 
4533  COMPARE_FAD_VECTORS(B1, B3, m*n);
4534 
4536  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4538  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4540  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4541  COMPARE_FAD_VECTORS(B1, B2, m*n);
4542  COMPARE_FAD_VECTORS(B1, B3, m*n);
4543 
4545  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4547  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4549  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4550  COMPARE_FAD_VECTORS(B1, B2, m*n);
4551  COMPARE_FAD_VECTORS(B1, B3, m*n);
4552 
4553  for (unsigned int i=0; i<m; i++) {
4554  A[i*m+i].val() = 1.0;
4555  for (unsigned int k=0; k<ndot; k++)
4556  A[i*m+i].fastAccessDx(k) = 0.0;
4557  }
4559  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4561  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4563  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4564  COMPARE_FAD_VECTORS(B1, B2, m*n);
4565  COMPARE_FAD_VECTORS(B1, B3, m*n);
4566 }
4567 
4568 // Tests all arguments, right side
4570  typedef decltype(this->fad) FadType;
4571  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4572  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4573  auto m = this->m_;
4574  auto n = this->n_;
4575  auto ndot = this->ndot_;
4576 
4577  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4578  for (unsigned int j=0; j<n; j++) {
4579  for (unsigned int i=0; i<n; i++) {
4580  A[i+j*n] = FadType(ndot, this->urand.number());
4581  for (unsigned int k=0; k<ndot; k++)
4582  A[i+j*n].fastAccessDx(k) = this->urand.number();
4583  }
4584  }
4585  FadType alpha(ndot, this->urand.number());
4586  for (unsigned int k=0; k<ndot; k++) {
4587  alpha.fastAccessDx(k) = this->urand.number();
4588  }
4589 
4590  for (unsigned int j=0; j<n; j++) {
4591  for (unsigned int i=0; i<m; i++) {
4592  ScalarType val = this->urand.number();
4593  B1[i+j*m] = FadType(ndot, val);
4594  B2[i+j*m] = FadType(ndot, val);
4595  B3[i+j*m] = FadType(ndot, val);
4596  for (unsigned int k=0; k<ndot; k++) {
4597  val = this->urand.number();
4598  B1[i+j*m].fastAccessDx(k) = val;
4599  B2[i+j*m].fastAccessDx(k) = val;
4600  B3[i+j*m].fastAccessDx(k) = val;
4601  }
4602  }
4603  }
4604 
4605  Teuchos::BLAS<int,FadType> teuchos_blas;
4607  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4608 
4609  Teuchos::BLAS<int,FadType> sacado_blas(false);
4611  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4612 
4613  COMPARE_FAD_VECTORS(B1, B2, m*n);
4614 
4615  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4616  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4618  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4619 
4620  COMPARE_FAD_VECTORS(B1, B3, m*n);
4621 
4623  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4625  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4627  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4628  COMPARE_FAD_VECTORS(B1, B2, m*n);
4629  COMPARE_FAD_VECTORS(B1, B3, m*n);
4630 
4632  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4634  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4636  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4637  COMPARE_FAD_VECTORS(B1, B2, m*n);
4638  COMPARE_FAD_VECTORS(B1, B3, m*n);
4639 
4640  for (unsigned int i=0; i<n; i++) {
4641  A[i*n+i].val() = 1.0;
4642  for (unsigned int k=0; k<ndot; k++)
4643  A[i*n+i].fastAccessDx(k) = 0.0;
4644  }
4646  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4648  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4650  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4651  COMPARE_FAD_VECTORS(B1, B2, m*n);
4652  COMPARE_FAD_VECTORS(B1, B3, m*n);
4653 }
4654 
4655 // Tests all arguments, left side, different lda, ldb
4657  typedef decltype(this->fad) FadType;
4658  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4659  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4660  auto m = this->m_;
4661  auto n = this->n_;
4662  auto ndot = this->ndot_;
4663 
4664  unsigned int lda = m+4;
4665  unsigned int ldb = m+5;
4666  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4667  for (unsigned int j=0; j<m; j++) {
4668  for (unsigned int i=0; i<lda; i++) {
4669  A[i+j*lda] = FadType(ndot, this->urand.number());
4670  for (unsigned int k=0; k<ndot; k++)
4671  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4672  }
4673  }
4674  FadType alpha(ndot, this->urand.number());
4675  for (unsigned int k=0; k<ndot; k++) {
4676  alpha.fastAccessDx(k) = this->urand.number();
4677  }
4678 
4679  for (unsigned int j=0; j<n; j++) {
4680  for (unsigned int i=0; i<ldb; i++) {
4681  ScalarType val = this->urand.number();
4682  B1[i+j*ldb] = FadType(ndot, val);
4683  B2[i+j*ldb] = FadType(ndot, val);
4684  B3[i+j*ldb] = FadType(ndot, val);
4685  for (unsigned int k=0; k<ndot; k++) {
4686  val = this->urand.number();
4687  B1[i+j*ldb].fastAccessDx(k) = val;
4688  B2[i+j*ldb].fastAccessDx(k) = val;
4689  B3[i+j*ldb].fastAccessDx(k) = val;
4690  }
4691  }
4692  }
4693 
4694  Teuchos::BLAS<int,FadType> teuchos_blas;
4696  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4697 
4698  Teuchos::BLAS<int,FadType> sacado_blas(false);
4700  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4701 
4702  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4703 
4704  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4705  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4707  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4708 
4709  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4710 
4712  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4714  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4716  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4717  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4718  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4719 
4721  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4723  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4725  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4726  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4727  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4728 
4729  for (unsigned int i=0; i<m; i++) {
4730  A[i*lda+i].val() = 1.0;
4731  for (unsigned int k=0; k<ndot; k++)
4732  A[i*lda+i].fastAccessDx(k) = 0.0;
4733  }
4735  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4737  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4739  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4740  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4741  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4742 }
4743 
4744 // Tests all arguments, right side, different lda, ldb
4746  typedef decltype(this->fad) FadType;
4747  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4748  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4749  auto m = this->m_;
4750  auto n = this->n_;
4751  auto ndot = this->ndot_;
4752 
4753  unsigned int lda = n+4;
4754  unsigned int ldb = m+5;
4755  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4756  for (unsigned int j=0; j<n; j++) {
4757  for (unsigned int i=0; i<lda; i++) {
4758  A[i+j*lda] = FadType(ndot, this->urand.number());
4759  for (unsigned int k=0; k<ndot; k++)
4760  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4761  }
4762  }
4763  FadType alpha(ndot, this->urand.number());
4764  for (unsigned int k=0; k<ndot; k++) {
4765  alpha.fastAccessDx(k) = this->urand.number();
4766  }
4767 
4768  for (unsigned int j=0; j<n; j++) {
4769  for (unsigned int i=0; i<ldb; i++) {
4770  ScalarType val = this->urand.number();
4771  B1[i+j*ldb] = FadType(ndot, val);
4772  B2[i+j*ldb] = FadType(ndot, val);
4773  B3[i+j*ldb] = FadType(ndot, val);
4774  for (unsigned int k=0; k<ndot; k++) {
4775  val = this->urand.number();
4776  B1[i+j*ldb].fastAccessDx(k) = val;
4777  B2[i+j*ldb].fastAccessDx(k) = val;
4778  B3[i+j*ldb].fastAccessDx(k) = val;
4779  }
4780  }
4781  }
4782 
4783  Teuchos::BLAS<int,FadType> teuchos_blas;
4785  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4786 
4787  Teuchos::BLAS<int,FadType> sacado_blas(false);
4789  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4790 
4791  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4792 
4793  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4794  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4796  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4797 
4798  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4799 
4801  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4803  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4805  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4806  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4807  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4808 
4810  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4812  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4814  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4815  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4816  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4817 
4818  for (unsigned int i=0; i<n; i++) {
4819  A[i*lda+i].val() = 1.0;
4820  for (unsigned int k=0; k<ndot; k++)
4821  A[i*lda+i].fastAccessDx(k) = 0.0;
4822  }
4824  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4826  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4828  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4829  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4830  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4831 }
4832 
4833 // Tests constant alpha
4835  typedef decltype(this->fad) FadType;
4836  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4837  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4838  auto m = this->m_;
4839  auto n = this->n_;
4840  auto ndot = this->ndot_;
4841 
4842  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4843  for (unsigned int j=0; j<m; j++) {
4844  for (unsigned int i=0; i<m; i++) {
4845  A[i+j*m] = FadType(ndot, this->urand.number());
4846  for (unsigned int k=0; k<ndot; k++)
4847  A[i+j*m].fastAccessDx(k) = this->urand.number();
4848  }
4849  }
4850  ScalarType alpha = this->urand.number();
4851 
4852  for (unsigned int j=0; j<n; j++) {
4853  for (unsigned int i=0; i<m; i++) {
4854  ScalarType val = this->urand.number();
4855  B1[i+j*m] = FadType(ndot, val);
4856  B2[i+j*m] = FadType(ndot, val);
4857  B3[i+j*m] = FadType(ndot, val);
4858  for (unsigned int k=0; k<ndot; k++) {
4859  val = this->urand.number();
4860  B1[i+j*m].fastAccessDx(k) = val;
4861  B2[i+j*m].fastAccessDx(k) = val;
4862  B3[i+j*m].fastAccessDx(k) = val;
4863  }
4864  }
4865  }
4866 
4867  Teuchos::BLAS<int,FadType> teuchos_blas;
4869  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4870 
4871  Teuchos::BLAS<int,FadType> sacado_blas(false);
4873  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4874 
4875  COMPARE_FAD_VECTORS(B1, B2, m*n);
4876 
4877  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4878  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4880  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4881 
4882  COMPARE_FAD_VECTORS(B1, B3, m*n);
4883 
4885  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4887  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4889  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4890  COMPARE_FAD_VECTORS(B1, B2, m*n);
4891  COMPARE_FAD_VECTORS(B1, B3, m*n);
4892 
4894  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4896  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4898  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4899  COMPARE_FAD_VECTORS(B1, B2, m*n);
4900  COMPARE_FAD_VECTORS(B1, B3, m*n);
4901 
4902  for (unsigned int i=0; i<m; i++) {
4903  A[i*m+i].val() = 1.0;
4904  for (unsigned int k=0; k<ndot; k++)
4905  A[i*m+i].fastAccessDx(k) = 0.0;
4906  }
4908  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4910  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4912  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4913  COMPARE_FAD_VECTORS(B1, B2, m*n);
4914  COMPARE_FAD_VECTORS(B1, B3, m*n);
4915 }
4916 
4917 // Tests constant B
4919  typedef decltype(this->fad) FadType;
4920  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4921  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4922  auto m = this->m_;
4923  auto n = this->n_;
4924  auto ndot = this->ndot_;
4925 
4926  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4927  for (unsigned int j=0; j<m; j++) {
4928  for (unsigned int i=0; i<m; i++) {
4929  A[i+j*m] = FadType(ndot, this->urand.number());
4930  for (unsigned int k=0; k<ndot; k++)
4931  A[i+j*m].fastAccessDx(k) = this->urand.number();
4932  }
4933  }
4934  FadType alpha(ndot, this->urand.number());
4935  for (unsigned int k=0; k<ndot; k++) {
4936  alpha.fastAccessDx(k) = this->urand.number();
4937  }
4938 
4939  for (unsigned int j=0; j<n; j++) {
4940  for (unsigned int i=0; i<m; i++) {
4941  ScalarType val = this->urand.number();
4942  B1[i+j*m] = val;
4943  B2[i+j*m] = val;
4944  B3[i+j*m] = val;
4945  }
4946  }
4947 
4948  Teuchos::BLAS<int,FadType> teuchos_blas;
4950  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4951 
4952  Teuchos::BLAS<int,FadType> sacado_blas(false);
4954  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4955 
4956  COMPARE_FAD_VECTORS(B1, B2, m*n);
4957 
4958  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4959  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4961  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4962 
4963  COMPARE_FAD_VECTORS(B1, B3, m*n);
4964 
4966  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4968  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4970  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4971  COMPARE_FAD_VECTORS(B1, B2, m*n);
4972  COMPARE_FAD_VECTORS(B1, B3, m*n);
4973 
4975  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4977  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4979  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4980  COMPARE_FAD_VECTORS(B1, B2, m*n);
4981  COMPARE_FAD_VECTORS(B1, B3, m*n);
4982 
4983  for (unsigned int i=0; i<m; i++) {
4984  A[i*m+i].val() = 1.0;
4985  for (unsigned int k=0; k<ndot; k++)
4986  A[i*m+i].fastAccessDx(k) = 0.0;
4987  }
4989  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4991  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4993  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4994  COMPARE_FAD_VECTORS(B1, B2, m*n);
4995  COMPARE_FAD_VECTORS(B1, B3, m*n);
4996 }
4997 
4998 // Tests constant A
5000  typedef decltype(this->fad) FadType;
5001  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
5002  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
5003  auto m = this->m_;
5004  auto n = this->n_;
5005  auto ndot = this->ndot_;
5006 
5007  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
5008  B4(m*n,ndot), B5(m*n,ndot);
5009  std::vector<ScalarType> a(m*m);
5010  for (unsigned int j=0; j<m; j++) {
5011  for (unsigned int i=0; i<m; i++) {
5012  a[i+j*m] = this->urand.number();
5013  A[i+j*m] = a[i+j*m];
5014  }
5015  }
5016  FadType alpha(ndot, this->urand.number());
5017  for (unsigned int k=0; k<ndot; k++) {
5018  alpha.fastAccessDx(k) = this->urand.number();
5019  }
5020 
5021  for (unsigned int j=0; j<n; j++) {
5022  for (unsigned int i=0; i<m; i++) {
5023  ScalarType val = this->urand.number();
5024  B1[i+j*m] = FadType(ndot, val);
5025  B2[i+j*m] = FadType(ndot, val);
5026  B3[i+j*m] = FadType(ndot, val);
5027  B4[i+j*m] = FadType(ndot, val);
5028  B5[i+j*m] = FadType(ndot, val);
5029  for (unsigned int k=0; k<ndot; k++) {
5030  val = this->urand.number();
5031  B1[i+j*m].fastAccessDx(k) = val;
5032  B2[i+j*m].fastAccessDx(k) = val;
5033  B3[i+j*m].fastAccessDx(k) = val;
5034  B4[i+j*m].fastAccessDx(k) = val;
5035  B5[i+j*m].fastAccessDx(k) = val;
5036  }
5037  }
5038  }
5039 
5040  Teuchos::BLAS<int,FadType> teuchos_blas;
5042  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5043 
5044  Teuchos::BLAS<int,FadType> sacado_blas(false);
5046  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5047 
5048  COMPARE_FAD_VECTORS(B1, B2, m*n);
5049 
5050  unsigned int sz = m*m + m*n*(1+ndot);
5051  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
5053  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5054 
5055  COMPARE_FAD_VECTORS(B1, B3, m*n);
5056 
5058  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5059 
5060  COMPARE_FAD_VECTORS(B1, B4, m*n);
5061 
5063  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5064 
5065  COMPARE_FAD_VECTORS(B1, B5, m*n);
5066 
5068  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5070  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5072  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5074  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5076  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5077  COMPARE_FAD_VECTORS(B1, B2, m*n);
5078  COMPARE_FAD_VECTORS(B1, B3, m*n);
5079  COMPARE_FAD_VECTORS(B1, B4, m*n);
5080  COMPARE_FAD_VECTORS(B1, B5, m*n);
5081 
5083  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5085  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5087  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5089  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5091  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5092  COMPARE_FAD_VECTORS(B1, B2, m*n);
5093  COMPARE_FAD_VECTORS(B1, B3, m*n);
5094  COMPARE_FAD_VECTORS(B1, B4, m*n);
5095  COMPARE_FAD_VECTORS(B1, B5, m*n);
5096 
5097  for (unsigned int i=0; i<m; i++) {
5098  A[i*m+i].val() = 1.0;
5099  for (unsigned int k=0; k<ndot; k++)
5100  A[i*m+i].fastAccessDx(k) = 0.0;
5101  }
5103  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5105  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5107  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5109  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5111  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5112  COMPARE_FAD_VECTORS(B1, B2, m*n);
5113  COMPARE_FAD_VECTORS(B1, B3, m*n);
5114  COMPARE_FAD_VECTORS(B1, B4, m*n);
5115  COMPARE_FAD_VECTORS(B1, B5, m*n);
5116 }
5117 
5120  testSCAL1,
5121  testSCAL2,
5122  testSCAL3,
5123  testSCAL4,
5124 
5125  testCOPY1,
5126  testCOPY2,
5127  testCOPY3,
5128  testCOPY4,
5129 
5130  testAXPY1,
5131  testAXPY2,
5132  testAXPY3,
5133  testAXPY4,
5134 
5135  testDOT1,
5136  testDOT2,
5137  testDOT3,
5138  testDOT4,
5139 
5140  testNRM21,
5141  testNRM22,
5142 
5143  testGEMV1,
5144  testGEMV2,
5145  testGEMV3,
5146  testGEMV4,
5147  testGEMV5,
5148  testGEMV6,
5149  testGEMV7,
5150  testGEMV8,
5151  testGEMV9,
5152 
5153  testTRMV1,
5154  testTRMV2,
5155  testTRMV3,
5156  testTRMV4,
5157 
5158  testGER1,
5159  testGER2,
5160  testGER3,
5161  testGER4,
5162  testGER5,
5163  testGER6,
5164  testGER7,
5165 
5166  testGEMM1,
5167  testGEMM2,
5168  testGEMM3,
5169  testGEMM4,
5170  testGEMM5,
5171  testGEMM6,
5172  testGEMM7,
5173  testGEMM8,
5174  testGEMM9,
5175  testGEMM10,
5176 
5177  testSYMM1,
5178  testSYMM2,
5179  testSYMM3,
5180  testSYMM4,
5181  testSYMM5,
5182  testSYMM6,
5183  testSYMM7,
5184  testSYMM8,
5185  testSYMM9,
5186 
5187  testTRMM1,
5188  testTRMM2,
5189  testTRMM3,
5190  testTRMM4,
5191  testTRMM5,
5192  testTRMM6,
5193  testTRMM7,
5194 
5195  testTRSM1,
5196  testTRSM2,
5197  testTRSM3,
5198  testTRSM4,
5199  testTRSM5,
5200  testTRSM6,
5201  testTRSM7);
5202 
5203 
5204 #endif // FADBLASUNITTESTS_HPP
Sacado::Random< ScalarType > urand
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Sacado::Random< double > real_urand
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sacado::ScalarType< FadType >::type ScalarType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
expr val()
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
#define A
Definition: Sacado_rad.hpp:552
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
TYPED_TEST_SUITE_P(FadBLASUnitTests)
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
A random number generator that generates random numbers uniformly distributed in the interval (a...
#define COMPARE_FADS(a, b)
Definition: GTestUtils.hpp:92
int n
const double y