Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadBLASUnitTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #ifndef FADBLASUNITTESTS_HPP
31 #define FADBLASUNITTESTS_HPP
32 
33 // Sacado includes
34 #include "Sacado_No_Kokkos.hpp"
35 #include "Sacado_Fad_BLAS.hpp"
36 #include "Sacado_Random.hpp"
37 
38 // gtest includes
39 #include <gtest/gtest.h>
40 
41 #include "GTestUtils.hpp"
42 
43 #define COMPARE_FAD_VECTORS(X1, X2, n) \
44  ASSERT_TRUE(X1.size() == std::size_t(n)); \
45  ASSERT_TRUE(X2.size() == std::size_t(n)); \
46  for (unsigned int i=0; i<n; i++) { \
47  COMPARE_FADS(X1[i], X2[i]); \
48  } \
49  ;
50 
51 // A class for testing differentiated BLAS operations for general Fad types
52 template <class FadType>
54 protected:
57 
58  // Random number generator
60 
61  // Real random number generator for derivative components
63 
64  // Number of matrix rows
65  unsigned int m_;
66 
67  // Number of matrix columns
68  unsigned int n_;
69 
70  // Number of matrix columns for level 3 blas
71  unsigned int l_;
72 
73  // Number of derivative components
74  unsigned int ndot_;
75 
76  // Tolerances to which fad objects should be the same
77  double tol_a, tol_r;
78 
80 
82  urand(), real_urand(), m_(5), n_(6), l_(4), ndot_(7),
83  tol_a(1.0e-11), tol_r(1.0e-11) {}
84 
85 }; // class FadBLASUnitTests
86 
88 
89 // Tests all arguments
91  typedef decltype(this->fad) FadType;
92  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
94  auto m = this->m_;
95  auto ndot = this->ndot_;
96 
97  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
98  for (unsigned int i=0; i<m; i++) {
99  ScalarType val = this->urand.number();
100  x1[i] = FadType(ndot, val);
101  x2[i] = FadType(ndot, val);
102  x3[i] = FadType(ndot, val);
103  for (unsigned int k=0; k<ndot; k++) {
104  val = this->urand.number();
105  x1[i].fastAccessDx(k) = val;
106  x2[i].fastAccessDx(k) = val;
107  x3[i].fastAccessDx(k) = val;
108  }
109  }
110  FadType alpha(ndot, this->urand.number());
111  for (unsigned int k=0; k<ndot; k++) {
112  alpha.fastAccessDx(k) = this->urand.number();
113  }
114 
115  Teuchos::BLAS<int,FadType> teuchos_blas;
116  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
117 
118  Teuchos::BLAS<int,FadType> sacado_blas(false);
119  sacado_blas.SCAL(m, alpha, &x2[0], 1);
120 
121  COMPARE_FAD_VECTORS(x1, x2, m);
122 
123  unsigned int sz = m*(1+ndot);
124  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
125  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
126 
127  COMPARE_FAD_VECTORS(x1, x3, m);
128 }
129 
130 // Tests non-unit inc
132  typedef decltype(this->fad) FadType;
133  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
135  auto m = this->m_;
136  auto ndot = this->ndot_;
137 
138  unsigned int incx = 2;
139  VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
140  for (unsigned int i=0; i<m*incx; i++) {
141  ScalarType val = this->urand.number();
142  x1[i] = FadType(ndot, val);
143  x2[i] = FadType(ndot, val);
144  x3[i] = FadType(ndot, val);
145  for (unsigned int k=0; k<ndot; k++) {
146  val = this->urand.number();
147  x1[i].fastAccessDx(k) = val;
148  x2[i].fastAccessDx(k) = val;
149  x3[i].fastAccessDx(k) = val;
150  }
151  }
152  FadType alpha(ndot, this->urand.number());
153  for (unsigned int k=0; k<ndot; k++) {
154  alpha.fastAccessDx(k) = this->urand.number();
155  }
156 
157  Teuchos::BLAS<int,FadType> teuchos_blas;
158  teuchos_blas.SCAL(m, alpha, &x1[0], incx);
159 
160  Teuchos::BLAS<int,FadType> sacado_blas(false);
161  sacado_blas.SCAL(m, alpha, &x2[0], incx);
162 
163  COMPARE_FAD_VECTORS(x1, x2, m*incx);
164 
165  unsigned int sz = m*(1+ndot);
166  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
167  sacado_blas2.SCAL(m, alpha, &x3[0], incx);
168 
169  COMPARE_FAD_VECTORS(x1, x3, m*incx);
170 }
171 
172 // Tests constant alpha
174  typedef decltype(this->fad) FadType;
175  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
177  auto m = this->m_;
178  auto ndot = this->ndot_;
179 
180  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
181  for (unsigned int i=0; i<m; i++) {
182  ScalarType val = this->urand.number();
183  x1[i] = FadType(ndot, val);
184  x2[i] = FadType(ndot, val);
185  x3[i] = FadType(ndot, val);
186  for (unsigned int k=0; k<ndot; k++) {
187  val = this->urand.number();
188  x1[i].fastAccessDx(k) = val;
189  x2[i].fastAccessDx(k) = val;
190  x3[i].fastAccessDx(k) = val;
191  }
192  }
193  ScalarType alpha = this->urand.number();
194 
195  Teuchos::BLAS<int,FadType> teuchos_blas;
196  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
197 
198  Teuchos::BLAS<int,FadType> sacado_blas(false);
199  sacado_blas.SCAL(m, alpha, &x2[0], 1);
200 
201  COMPARE_FAD_VECTORS(x1, x2, m);
202 
203  unsigned int sz = m*(1+ndot);
204  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
205  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
206 
207  COMPARE_FAD_VECTORS(x1, x3, m);
208 }
209 
210 // Tests constant x
212  typedef decltype(this->fad) FadType;
213  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
215  auto m = this->m_;
216  auto ndot = this->ndot_;
217 
218  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
219  for (unsigned int i=0; i<m; i++) {
220  ScalarType val = this->urand.number();
221  x1[i] = val;
222  x2[i] = val;
223  x3[i] = val;
224  }
225  FadType alpha = FadType(ndot, this->urand.number());
226  for (unsigned int k=0; k<ndot; k++)
227  alpha.fastAccessDx(k) = this->urand.number();
228 
229  Teuchos::BLAS<int,FadType> teuchos_blas;
230  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
231 
232  Teuchos::BLAS<int,FadType> sacado_blas(false);
233  sacado_blas.SCAL(m, alpha, &x2[0], 1);
234 
235  COMPARE_FAD_VECTORS(x1, x2, m);
236 
237  unsigned int sz = m*(1+ndot);
238  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
239  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
240 
241  COMPARE_FAD_VECTORS(x1, x3, m);
242 }
243 
244 // Tests all arguments
246  typedef decltype(this->fad) FadType;
247  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
249  auto m = this->m_;
250  auto ndot = this->ndot_;
251 
252  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
253  for (unsigned int i=0; i<m; i++) {
254  x[i] = FadType(ndot, this->urand.number());
255  ScalarType val = this->urand.number();
256  y1[i] = FadType(ndot, val);
257  y2[i] = FadType(ndot, val);
258  y3[i] = FadType(ndot, val);
259  for (unsigned int k=0; k<ndot; k++) {
260  x[i].fastAccessDx(k) = this->urand.number();
261  val = this->urand.number();
262  y1[i].fastAccessDx(k) = val;
263  y2[i].fastAccessDx(k) = val;
264  y3[i].fastAccessDx(k) = val;
265  }
266  }
267 
268  Teuchos::BLAS<int,FadType> teuchos_blas;
269  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
270 
271  Teuchos::BLAS<int,FadType> sacado_blas(false);
272  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
273 
274  COMPARE_FAD_VECTORS(y1, y2, m);
275 
276  unsigned int sz = 2*m*(1+ndot);
277  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
278  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
279 
280  COMPARE_FAD_VECTORS(y1, y3, m);
281 }
282 
283 // Tests non unit inc
285  typedef decltype(this->fad) FadType;
286  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
288  auto m = this->m_;
289  auto ndot = this->ndot_;
290 
291  unsigned int incx = 2;
292  unsigned int incy = 3;
293  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
294  for (unsigned int i=0; i<m*incx; i++) {
295  x[i] = FadType(ndot, this->urand.number());
296  for (unsigned int k=0; k<ndot; k++) {
297  x[i].fastAccessDx(k) = this->urand.number();
298  }
299  }
300  for (unsigned int i=0; i<m*incy; i++) {
301  ScalarType val = this->urand.number();
302  y1[i] = FadType(ndot, val);
303  y2[i] = FadType(ndot, val);
304  y3[i] = FadType(ndot, val);
305  for (unsigned int k=0; k<ndot; k++) {
306  val = this->urand.number();
307  y1[i].fastAccessDx(k) = val;
308  y2[i].fastAccessDx(k) = val;
309  y3[i].fastAccessDx(k) = val;
310  }
311  }
312 
313  Teuchos::BLAS<int,FadType> teuchos_blas;
314  teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
315 
316  Teuchos::BLAS<int,FadType> sacado_blas(false);
317  sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
318 
319  COMPARE_FAD_VECTORS(y1, y2, m*incy);
320 
321  unsigned int sz = 2*m*(1+ndot);
322  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
323  sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
324 
325  COMPARE_FAD_VECTORS(y1, y3, m*incy);
326 }
327 
328 // Tests x constant
330  typedef decltype(this->fad) FadType;
331  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
333  auto m = this->m_;
334  auto ndot = this->ndot_;
335 
336  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
337  for (unsigned int i=0; i<m; i++) {
338  x[i] = this->urand.number();
339  }
340  for (unsigned int i=0; i<m; i++) {
341  ScalarType val = this->urand.number();
342  y1[i] = FadType(ndot, val);
343  y2[i] = FadType(ndot, val);
344  y3[i] = FadType(ndot, val);
345  for (unsigned int k=0; k<ndot; k++) {
346  val = this->urand.number();
347  y1[i].fastAccessDx(k) = val;
348  y2[i].fastAccessDx(k) = val;
349  y3[i].fastAccessDx(k) = val;
350  }
351  }
352 
353  Teuchos::BLAS<int,FadType> teuchos_blas;
354  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
355 
356  Teuchos::BLAS<int,FadType> sacado_blas(false);
357  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
358 
359  COMPARE_FAD_VECTORS(y1, y2, m);
360 
361  unsigned int sz = 2*m*(1+ndot);
362  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
363  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
364 
365  COMPARE_FAD_VECTORS(y1, y3, m);
366 }
367 
368 // Tests y constant
370  typedef decltype(this->fad) FadType;
371  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
373  auto m = this->m_;
374  auto ndot = this->ndot_;
375 
376  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
377  for (unsigned int i=0; i<m; i++) {
378  x[i] = FadType(ndot, this->urand.number());
379  ScalarType val = this->urand.number();
380  y1[i] = val;
381  y2[i] = val;
382  y3[i] = val;
383  for (unsigned int k=0; k<ndot; k++) {
384  x[i].fastAccessDx(k) = this->urand.number();
385  }
386  }
387 
388  Teuchos::BLAS<int,FadType> teuchos_blas;
389  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
390 
391  Teuchos::BLAS<int,FadType> sacado_blas(false);
392  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
393 
394  COMPARE_FAD_VECTORS(y1, y2, m);
395 
396  unsigned int sz = 2*m*(1+ndot);
397  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
398  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
399 
400  COMPARE_FAD_VECTORS(y1, y3, m);
401 }
402 
403 // Tests all arguments
405  typedef decltype(this->fad) FadType;
406  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
408  auto m = this->m_;
409  auto ndot = this->ndot_;
410 
411  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
412  for (unsigned int i=0; i<m; i++) {
413  x[i] = FadType(ndot, this->urand.number());
414  ScalarType val = this->urand.number();
415  y1[i] = FadType(ndot, val);
416  y2[i] = FadType(ndot, val);
417  y3[i] = FadType(ndot, val);
418  for (unsigned int k=0; k<ndot; k++) {
419  x[i].fastAccessDx(k) = this->urand.number();
420  val = this->urand.number();
421  y1[i].fastAccessDx(k) = val;
422  y2[i].fastAccessDx(k) = val;
423  y3[i].fastAccessDx(k) = val;
424  }
425  }
426  FadType alpha(ndot, this->urand.number());
427  for (unsigned int k=0; k<ndot; k++)
428  alpha.fastAccessDx(k) = this->urand.number();
429 
430  Teuchos::BLAS<int,FadType> teuchos_blas;
431  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
432 
433  Teuchos::BLAS<int,FadType> sacado_blas(false);
434  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
435 
436  COMPARE_FAD_VECTORS(y1, y2, m);
437 
438  unsigned int sz = 2*m*(1+ndot);
439  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
440  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
441 
442  COMPARE_FAD_VECTORS(y1, y3, m);
443 }
444 
445 // Tests non unit inc
447  typedef decltype(this->fad) FadType;
448  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
450  auto m = this->m_;
451  auto ndot = this->ndot_;
452 
453  unsigned int incx = 2;
454  unsigned int incy = 3;
455  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
456  for (unsigned int i=0; i<m*incx; i++) {
457  x[i] = FadType(ndot, this->urand.number());
458  for (unsigned int k=0; k<ndot; k++) {
459  x[i].fastAccessDx(k) = this->urand.number();
460  }
461  }
462  for (unsigned int i=0; i<m*incy; i++) {
463  ScalarType val = this->urand.number();
464  y1[i] = FadType(ndot, val);
465  y2[i] = FadType(ndot, val);
466  y3[i] = FadType(ndot, val);
467  for (unsigned int k=0; k<ndot; k++) {
468  val = this->urand.number();
469  y1[i].fastAccessDx(k) = val;
470  y2[i].fastAccessDx(k) = val;
471  y3[i].fastAccessDx(k) = val;
472  }
473  }
474  FadType alpha(ndot, this->urand.number());
475  for (unsigned int k=0; k<ndot; k++)
476  alpha.fastAccessDx(k) = this->urand.number();
477 
478  Teuchos::BLAS<int,FadType> teuchos_blas;
479  teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
480 
481  Teuchos::BLAS<int,FadType> sacado_blas(false);
482  sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
483 
484  COMPARE_FAD_VECTORS(y1, y2, m*incy);
485 
486  unsigned int sz = 2*m*(1+ndot);
487  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
488  sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
489 
490  COMPARE_FAD_VECTORS(y1, y3, m*incy);
491 }
492 
493 // Tests x constant
495  typedef decltype(this->fad) FadType;
496  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
498  auto m = this->m_;
499  auto ndot = this->ndot_;
500 
501  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
502  std::vector<ScalarType> xx(m);
503  for (unsigned int i=0; i<m; i++) {
504  xx[i] = this->urand.number();
505  x[i] = xx[i];
506  ScalarType val = this->urand.number();
507  y1[i] = FadType(ndot, val);
508  y2[i] = FadType(ndot, val);
509  y3[i] = FadType(ndot, val);
510  y4[i] = FadType(ndot, val);
511  for (unsigned int k=0; k<ndot; k++) {
512  val = this->urand.number();
513  y1[i].fastAccessDx(k) = val;
514  y2[i].fastAccessDx(k) = val;
515  y3[i].fastAccessDx(k) = val;
516  y4[i].fastAccessDx(k) = val;
517  }
518  }
519  FadType alpha(ndot, this->urand.number());
520  for (unsigned int k=0; k<ndot; k++)
521  alpha.fastAccessDx(k) = this->urand.number();
522 
523  Teuchos::BLAS<int,FadType> teuchos_blas;
524  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
525 
526  Teuchos::BLAS<int,FadType> sacado_blas(false);
527  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
528 
529  COMPARE_FAD_VECTORS(y1, y2, m);
530 
531  unsigned int sz = m*(1+ndot)+m;
532  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
533  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
534 
535  COMPARE_FAD_VECTORS(y1, y3, m);
536 
537  sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
538 
539  COMPARE_FAD_VECTORS(y1, y4, m);
540 }
541 
542 // Tests y constant
544  typedef decltype(this->fad) FadType;
545  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
547  auto m = this->m_;
548  auto ndot = this->ndot_;
549 
550  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
551  for (unsigned int i=0; i<m; i++) {
552  x[i] = FadType(ndot, this->urand.number());
553  ScalarType val = this->urand.number();
554  y1[i] = val;
555  y2[i] = val;
556  y3[i] = val;
557  for (unsigned int k=0; k<ndot; k++) {
558  x[i].fastAccessDx(k) = this->urand.number();
559  }
560  }
561  FadType alpha(ndot, this->urand.number());
562  for (unsigned int k=0; k<ndot; k++)
563  alpha.fastAccessDx(k) = this->urand.number();
564 
565  Teuchos::BLAS<int,FadType> teuchos_blas;
566  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
567 
568  Teuchos::BLAS<int,FadType> sacado_blas(false);
569  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
570 
571  COMPARE_FAD_VECTORS(y1, y2, m);
572 
573  unsigned int sz = 2*m*(1+ndot);
574  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
575  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
576 
577  COMPARE_FAD_VECTORS(y1, y3, m);
578 }
579 
580 // Tests all arguments
582  typedef decltype(this->fad) FadType;
584  auto m = this->m_;
585  auto ndot = this->ndot_;
586 
587  VectorType X(m,ndot), Y(m,ndot);
588  for (unsigned int i=0; i<m; i++) {
589  X[i] = FadType(ndot, this->real_urand.number());
590  Y[i] = FadType(ndot, this->real_urand.number());
591  for (unsigned int k=0; k<ndot; k++) {
592  X[i].fastAccessDx(k) = this->real_urand.number();
593  Y[i].fastAccessDx(k) = this->real_urand.number();
594  }
595  }
596 
597  Teuchos::BLAS<int,FadType> teuchos_blas;
598  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
599 
600  Teuchos::BLAS<int,FadType> sacado_blas(false);
601  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
602 
603  COMPARE_FADS(z1, z2);
604 
605  unsigned int sz = 2*m*(1+ndot);
606  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
607  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
608 
609  COMPARE_FADS(z1, z3);
610 }
611 
612 // Tests non-unit inc
614  typedef decltype(this->fad) FadType;
616  auto m = this->m_;
617  auto ndot = this->ndot_;
618 
619  unsigned int incx = 2;
620  unsigned int incy = 3;
621  VectorType X(m*incx,ndot), Y(m*incy,ndot);
622  for (unsigned int i=0; i<m*incx; i++) {
623  X[i] = FadType(ndot, this->real_urand.number());
624  for (unsigned int k=0; k<ndot; k++) {
625  X[i].fastAccessDx(k) = this->real_urand.number();
626  }
627  }
628  for (unsigned int i=0; i<m*incy; i++) {
629  Y[i] = FadType(ndot, this->real_urand.number());
630  for (unsigned int k=0; k<ndot; k++) {
631  Y[i].fastAccessDx(k) = this->real_urand.number();
632  }
633  }
634 
635  Teuchos::BLAS<int,FadType> teuchos_blas;
636  FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
637 
638  Teuchos::BLAS<int,FadType> sacado_blas(false);
639  FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
640 
641  COMPARE_FADS(z1, z2);
642 
643  unsigned int sz = 2*m*(1+ndot);
644  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
645  FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
646 
647  COMPARE_FADS(z1, z3);
648 }
649 
650 // Tests X constant
652  typedef decltype(this->fad) FadType;
653  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
655  auto m = this->m_;
656  auto ndot = this->ndot_;
657 
658  VectorType X(m,0), Y(m,ndot);
659  std::vector<ScalarType> x(m);
660  for (unsigned int i=0; i<m; i++) {
661  x[i] = this->urand.number();
662  X[i] = x[i];
663  Y[i] = FadType(ndot, this->real_urand.number());
664  for (unsigned int k=0; k<ndot; k++) {
665  Y[i].fastAccessDx(k) = this->real_urand.number();
666  }
667  }
668 
669  Teuchos::BLAS<int,FadType> teuchos_blas;
670  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
671 
672  Teuchos::BLAS<int,FadType> sacado_blas(false);
673  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
674 
675  COMPARE_FADS(z1, z2);
676 
677  unsigned int sz = 2*m*(1+ndot);
678  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
679  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
680 
681  COMPARE_FADS(z1, z3);
682 
683  FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
684 
685  COMPARE_FADS(z1, z4);
686 }
687 
688 // Tests Y constant
690  typedef decltype(this->fad) FadType;
691  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
693  auto m = this->m_;
694  auto ndot = this->ndot_;
695 
696  VectorType X(m,ndot), Y(m,0);
697  std::vector<ScalarType> y(m);
698  for (unsigned int i=0; i<m; i++) {
699  X[i] = FadType(ndot, this->real_urand.number());
700  y[i] = this->urand.number();
701  Y[i] = y[i];
702  for (unsigned int k=0; k<ndot; k++) {
703  X[i].fastAccessDx(k) = this->real_urand.number();
704  }
705  }
706 
707  Teuchos::BLAS<int,FadType> teuchos_blas;
708  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
709 
710  Teuchos::BLAS<int,FadType> sacado_blas(false);
711  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
712 
713  COMPARE_FADS(z1, z2);
714 
715  unsigned int sz = 2*m*(1+ndot);
716  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
717  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
718 
719  COMPARE_FADS(z1, z3);
720 
721  FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
722 
723  COMPARE_FADS(z1, z4);
724 }
725 
726 // Tests all arguments
728  typedef decltype(this->fad) FadType;
730  auto m = this->m_;
731  auto ndot = this->ndot_;
732 
733  VectorType X(m,ndot);
734  for (unsigned int i=0; i<m; i++) {
735  X[i] = FadType(ndot, this->real_urand.number());
736  for (unsigned int k=0; k<ndot; k++) {
737  X[i].fastAccessDx(k) = this->real_urand.number();
738  }
739  }
740 
741  Teuchos::BLAS<int,FadType> teuchos_blas;
743  teuchos_blas.NRM2(m, &X[0], 1);
744 
745  Teuchos::BLAS<int,FadType> sacado_blas(false);
747  sacado_blas.NRM2(m, &X[0], 1);
748 
749  COMPARE_FADS(z1, z2);
750 
751  unsigned int sz = m*(1+ndot);
752  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
754  sacado_blas2.NRM2(m, &X[0], 1);
755 
756  COMPARE_FADS(z1, z3);
757 }
758 
759 // Tests non-unit inc
761  typedef decltype(this->fad) FadType;
763  auto m = this->m_;
764  auto ndot = this->ndot_;
765 
766  unsigned int incx = 2;
767  VectorType X(m*incx,ndot);
768  for (unsigned int i=0; i<m*incx; i++) {
769  X[i] = FadType(ndot, this->real_urand.number());
770  for (unsigned int k=0; k<ndot; k++) {
771  X[i].fastAccessDx(k) = this->real_urand.number();
772  }
773  }
774 
775  Teuchos::BLAS<int,FadType> teuchos_blas;
777  teuchos_blas.NRM2(m, &X[0], incx);
778 
779  Teuchos::BLAS<int,FadType> sacado_blas(false);
781  sacado_blas.NRM2(m, &X[0], incx);
782 
783  COMPARE_FADS(z1, z2);
784 
785  unsigned int sz = m*(1+ndot);
786  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
788  sacado_blas2.NRM2(m, &X[0], incx);
789 
790  COMPARE_FADS(z1, z3);
791 }
792 
793 // Tests all arguments
795  typedef decltype(this->fad) FadType;
796  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
798  auto m = this->m_;
799  auto n = this->n_;
800  auto ndot = this->ndot_;
801 
802  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
803  for (unsigned int j=0; j<n; j++) {
804  for (unsigned int i=0; i<m; i++) {
805  A[i+j*m] = FadType(ndot, this->urand.number());
806  for (unsigned int k=0; k<ndot; k++)
807  A[i+j*m].fastAccessDx(k) = this->urand.number();
808  }
809  B[j] = FadType(ndot, this->urand.number());
810  for (unsigned int k=0; k<ndot; k++)
811  B[j].fastAccessDx(k) = this->urand.number();
812  }
813  FadType alpha(ndot, this->urand.number());
814  FadType beta(ndot, this->urand.number());
815  for (unsigned int k=0; k<ndot; k++) {
816  alpha.fastAccessDx(k) = this->urand.number();
817  beta.fastAccessDx(k) = this->urand.number();
818  }
819 
820  for (unsigned int i=0; i<m; i++) {
821  ScalarType val = this->urand.number();
822  C1[i] = FadType(ndot, val);
823  C2[i] = FadType(ndot, val);
824  C3[i] = FadType(ndot, val);
825  for (unsigned int k=0; k<ndot; k++) {
826  val = this->urand.number();
827  C1[i].fastAccessDx(k) = val;
828  C2[i].fastAccessDx(k) = val;
829  C3[i].fastAccessDx(k) = val;
830  }
831  }
832 
833  Teuchos::BLAS<int,FadType> teuchos_blas;
834  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
835  beta, &C1[0], 1);
836 
837  Teuchos::BLAS<int,FadType> sacado_blas(false);
838  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
839  beta, &C2[0], 1);
840 
841  COMPARE_FAD_VECTORS(C1, C2, m);
842 
843  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
844  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
845  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
846  beta, &C3[0], 1);
847 
848  COMPARE_FAD_VECTORS(C1, C3, m);
849 }
850 
851 // Tests non-unit inc and different lda
853  typedef decltype(this->fad) FadType;
854  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
856  auto m = this->m_;
857  auto n = this->n_;
858  auto ndot = this->ndot_;
859 
860  unsigned int lda = m+3;
861  unsigned int incb = 2;
862  unsigned int incc = 3;
863  VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
864  C3(m*incc,ndot);
865  for (unsigned int j=0; j<n; j++) {
866  for (unsigned int i=0; i<lda; i++) {
867  A[i+j*lda] = FadType(ndot, this->urand.number());
868  for (unsigned int k=0; k<ndot; k++)
869  A[i+j*lda].fastAccessDx(k) = this->urand.number();
870  }
871  }
872  for (unsigned int j=0; j<n*incb; j++) {
873  B[j] = FadType(ndot, this->urand.number());
874  for (unsigned int k=0; k<ndot; k++)
875  B[j].fastAccessDx(k) = this->urand.number();
876  }
877  FadType alpha(ndot, this->urand.number());
878  FadType beta(ndot, this->urand.number());
879  for (unsigned int k=0; k<ndot; k++) {
880  alpha.fastAccessDx(k) = this->urand.number();
881  beta.fastAccessDx(k) = this->urand.number();
882  }
883 
884  for (unsigned int i=0; i<m*incc; i++) {
885  ScalarType val = this->urand.number();
886  C1[i] = FadType(ndot, val);
887  C2[i] = FadType(ndot, val);
888  C3[i] = FadType(ndot, val);
889  for (unsigned int k=0; k<ndot; k++) {
890  val = this->urand.number();
891  C1[i].fastAccessDx(k) = val;
892  C2[i].fastAccessDx(k) = val;
893  C3[i].fastAccessDx(k) = val;
894  }
895  }
896 
897  Teuchos::BLAS<int,FadType> teuchos_blas;
898  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
899  beta, &C1[0], incc);
900 
901  Teuchos::BLAS<int,FadType> sacado_blas(false);
902  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
903  beta, &C2[0], incc);
904 
905  COMPARE_FAD_VECTORS(C1, C2, m*incc);
906 
907  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
908  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
909  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
910  beta, &C3[0], incc);
911 
912  COMPARE_FAD_VECTORS(C1, C3, m*incc);
913 }
914 
915 // Tests transpose with all arguments
917  typedef decltype(this->fad) FadType;
918  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
920  auto m = this->m_;
921  auto n = this->n_;
922  auto ndot = this->ndot_;
923 
924  VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
925  for (unsigned int j=0; j<n; j++) {
926  for (unsigned int i=0; i<m; i++) {
927  A[i+j*m] = FadType(ndot, this->urand.number());
928  for (unsigned int k=0; k<ndot; k++)
929  A[i+j*m].fastAccessDx(k) = this->urand.number();
930  }
931  }
932  for (unsigned int j=0; j<m; j++) {
933  B[j] = FadType(ndot, this->urand.number());
934  for (unsigned int k=0; k<ndot; k++)
935  B[j].fastAccessDx(k) = this->urand.number();
936  }
937  FadType alpha(ndot, this->urand.number());
938  FadType beta(ndot, this->urand.number());
939  for (unsigned int k=0; k<ndot; k++) {
940  alpha.fastAccessDx(k) = this->urand.number();
941  beta.fastAccessDx(k) = this->urand.number();
942  }
943 
944  for (unsigned int i=0; i<n; i++) {
945  ScalarType val = this->urand.number();
946  C1[i] = FadType(ndot, val);
947  C2[i] = FadType(ndot, val);
948  C3[i] = FadType(ndot, val);
949  for (unsigned int k=0; k<ndot; k++) {
950  val = this->urand.number();
951  C1[i].fastAccessDx(k) = val;
952  C2[i].fastAccessDx(k) = val;
953  C3[i].fastAccessDx(k) = val;
954  }
955  }
956 
957  Teuchos::BLAS<int,FadType> teuchos_blas;
958  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
959  beta, &C1[0], 1);
960 
961  Teuchos::BLAS<int,FadType> sacado_blas(false);
962  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
963  beta, &C2[0], 1);
964 
965  COMPARE_FAD_VECTORS(C1, C2, n);
966 
967  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
968  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
969  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
970  beta, &C3[0], 1);
971 
972  COMPARE_FAD_VECTORS(C1, C3, n);
973 }
974 
975 // Tests transpose with non-unit inc and different lda
977  typedef decltype(this->fad) FadType;
978  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
980  auto m = this->m_;
981  auto n = this->n_;
982  auto ndot = this->ndot_;
983 
984  unsigned int lda = m+3;
985  unsigned int incb = 2;
986  unsigned int incc = 3;
987  VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
988  C3(n*incc,ndot);
989  for (unsigned int j=0; j<n; j++) {
990  for (unsigned int i=0; i<lda; i++) {
991  A[i+j*lda] = FadType(ndot, this->urand.number());
992  for (unsigned int k=0; k<ndot; k++)
993  A[i+j*lda].fastAccessDx(k) = this->urand.number();
994  }
995  }
996  for (unsigned int j=0; j<m*incb; j++) {
997  B[j] = FadType(ndot, this->urand.number());
998  for (unsigned int k=0; k<ndot; k++)
999  B[j].fastAccessDx(k) = this->urand.number();
1000  }
1001  FadType alpha(ndot, this->urand.number());
1002  FadType beta(ndot, this->urand.number());
1003  for (unsigned int k=0; k<ndot; k++) {
1004  alpha.fastAccessDx(k) = this->urand.number();
1005  beta.fastAccessDx(k) = this->urand.number();
1006  }
1007 
1008  for (unsigned int i=0; i<n*incc; i++) {
1009  ScalarType val = this->urand.number();
1010  C1[i] = FadType(ndot, val);
1011  C2[i] = FadType(ndot, val);
1012  C3[i] = FadType(ndot, val);
1013  for (unsigned int k=0; k<ndot; k++) {
1014  val = this->urand.number();
1015  C1[i].fastAccessDx(k) = val;
1016  C2[i].fastAccessDx(k) = val;
1017  C3[i].fastAccessDx(k) = val;
1018  }
1019  }
1020 
1021  Teuchos::BLAS<int,FadType> teuchos_blas;
1022  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1023  beta, &C1[0], incc);
1024 
1025  Teuchos::BLAS<int,FadType> sacado_blas(false);
1026  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1027  beta, &C2[0], incc);
1028 
1029  COMPARE_FAD_VECTORS(C1, C2, n*incc);
1030 
1031  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1032  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1033  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1034  beta, &C3[0], incc);
1035 
1036  COMPARE_FAD_VECTORS(C1, C3, n*incc);
1037 }
1038 
1039 // Tests with constant C
1041  typedef decltype(this->fad) FadType;
1042  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1043  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1044  auto m = this->m_;
1045  auto n = this->n_;
1046  auto ndot = this->ndot_;
1047 
1048  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1049  for (unsigned int j=0; j<n; j++) {
1050  for (unsigned int i=0; i<m; i++) {
1051  A[i+j*m] = FadType(ndot, this->urand.number());
1052  for (unsigned int k=0; k<ndot; k++)
1053  A[i+j*m].fastAccessDx(k) = this->urand.number();
1054  }
1055  B[j] = FadType(ndot, this->urand.number());
1056  for (unsigned int k=0; k<ndot; k++)
1057  B[j].fastAccessDx(k) = this->urand.number();
1058  }
1059  FadType alpha(ndot, this->urand.number());
1060  FadType beta(ndot, this->urand.number());
1061  for (unsigned int k=0; k<ndot; k++) {
1062  alpha.fastAccessDx(k) = this->urand.number();
1063  beta.fastAccessDx(k) = this->urand.number();
1064  }
1065 
1066  for (unsigned int i=0; i<m; i++) {
1067  ScalarType val = this->urand.number();
1068  C1[i] = val;
1069  C2[i] = val;
1070  C3[i] = val;
1071  }
1072 
1073  Teuchos::BLAS<int,FadType> teuchos_blas;
1074  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1075  beta, &C1[0], 1);
1076 
1077  Teuchos::BLAS<int,FadType> sacado_blas(false);
1078  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1079  beta, &C2[0], 1);
1080 
1081  COMPARE_FAD_VECTORS(C1, C2, m);
1082 
1083  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1084  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1085  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1086  beta, &C3[0], 1);
1087 
1088  COMPARE_FAD_VECTORS(C1, C3, m);
1089 }
1090 
1091 // Tests with constant alpha, beta
1093  typedef decltype(this->fad) FadType;
1094  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1095  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1096  auto m = this->m_;
1097  auto n = this->n_;
1098  auto ndot = this->ndot_;
1099 
1100  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1101  for (unsigned int j=0; j<n; j++) {
1102  for (unsigned int i=0; i<m; i++) {
1103  A[i+j*m] = FadType(ndot, this->urand.number());
1104  for (unsigned int k=0; k<ndot; k++)
1105  A[i+j*m].fastAccessDx(k) = this->urand.number();
1106  }
1107  B[j] = FadType(ndot, this->urand.number());
1108  for (unsigned int k=0; k<ndot; k++)
1109  B[j].fastAccessDx(k) = this->urand.number();
1110  }
1111  ScalarType alpha = this->urand.number();
1112  ScalarType beta = this->urand.number();
1113 
1114  for (unsigned int i=0; i<m; i++) {
1115  ScalarType val = this->urand.number();
1116  C1[i] = FadType(ndot, val);
1117  C2[i] = FadType(ndot, val);
1118  C3[i] = FadType(ndot, val);
1119  for (unsigned int k=0; k<ndot; k++) {
1120  val = this->urand.number();
1121  C1[i].fastAccessDx(k) = val;
1122  C2[i].fastAccessDx(k) = val;
1123  C3[i].fastAccessDx(k) = val;
1124  }
1125  }
1126 
1127  Teuchos::BLAS<int,FadType> teuchos_blas;
1128  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1129  beta, &C1[0], 1);
1130 
1131  Teuchos::BLAS<int,FadType> sacado_blas(false);
1132  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1133  beta, &C2[0], 1);
1134 
1135  COMPARE_FAD_VECTORS(C1, C2, m);
1136 
1137  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1138  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1139  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1140  beta, &C3[0], 1);
1141 
1142  COMPARE_FAD_VECTORS(C1, C3, m);
1143 }
1144 
1145 // Tests wth constant B
1147  typedef decltype(this->fad) FadType;
1148  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1149  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1150  auto m = this->m_;
1151  auto n = this->n_;
1152  auto ndot = this->ndot_;
1153 
1154  VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1155  C4(m,ndot);
1156  std::vector<ScalarType> b(n);
1157  for (unsigned int j=0; j<n; j++) {
1158  for (unsigned int i=0; i<m; i++) {
1159  A[i+j*m] = FadType(ndot, this->urand.number());
1160  for (unsigned int k=0; k<ndot; k++)
1161  A[i+j*m].fastAccessDx(k) = this->urand.number();
1162  }
1163  b[j] = this->urand.number();
1164  B[j] = b[j];
1165  }
1166  FadType alpha(ndot, this->urand.number());
1167  FadType beta(ndot, this->urand.number());
1168  for (unsigned int k=0; k<ndot; k++) {
1169  alpha.fastAccessDx(k) = this->urand.number();
1170  beta.fastAccessDx(k) = this->urand.number();
1171  }
1172 
1173  for (unsigned int i=0; i<m; i++) {
1174  ScalarType val = this->urand.number();
1175  C1[i] = FadType(ndot, val);
1176  C2[i] = FadType(ndot, val);
1177  C3[i] = FadType(ndot, val);
1178  C4[i] = FadType(ndot, val);
1179  for (unsigned int k=0; k<ndot; k++) {
1180  val = this->urand.number();
1181  C1[i].fastAccessDx(k) = val;
1182  C2[i].fastAccessDx(k) = val;
1183  C3[i].fastAccessDx(k) = val;
1184  C4[i].fastAccessDx(k) = val;
1185  }
1186  }
1187 
1188  Teuchos::BLAS<int,FadType> teuchos_blas;
1189  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1190  beta, &C1[0], 1);
1191 
1192  Teuchos::BLAS<int,FadType> sacado_blas(false);
1193  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1194  beta, &C2[0], 1);
1195 
1196  COMPARE_FAD_VECTORS(C1, C2, m);
1197 
1198  unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1199  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1200  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1201  beta, &C3[0], 1);
1202 
1203  COMPARE_FAD_VECTORS(C1, C3, m);
1204 
1205  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1,
1206  beta, &C4[0], 1);
1207 
1208  COMPARE_FAD_VECTORS(C1, C4, m);
1209 }
1210 
1211 // Tests with constant A
1213  typedef decltype(this->fad) FadType;
1214  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1215  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1216  auto m = this->m_;
1217  auto n = this->n_;
1218  auto ndot = this->ndot_;
1219 
1220  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1221  C4(m,ndot);
1222  std::vector<ScalarType> a(m*n);
1223  for (unsigned int j=0; j<n; j++) {
1224  for (unsigned int i=0; i<m; i++) {
1225  a[i+j*m] = this->urand.number();
1226  A[i+j*m] = a[i+j*m];
1227  }
1228  B[j] = FadType(ndot, this->urand.number());
1229  for (unsigned int k=0; k<ndot; k++)
1230  B[j].fastAccessDx(k) = this->urand.number();
1231  }
1232  FadType alpha(ndot, this->urand.number());
1233  FadType beta(ndot, this->urand.number());
1234  for (unsigned int k=0; k<ndot; k++) {
1235  alpha.fastAccessDx(k) = this->urand.number();
1236  beta.fastAccessDx(k) = this->urand.number();
1237  }
1238 
1239  for (unsigned int i=0; i<m; i++) {
1240  ScalarType val = this->urand.number();
1241  C1[i] = FadType(ndot, val);
1242  C2[i] = FadType(ndot, val);
1243  C3[i] = FadType(ndot, val);
1244  C4[i] = FadType(ndot, val);
1245  for (unsigned int k=0; k<ndot; k++) {
1246  val = this->urand.number();
1247  C1[i].fastAccessDx(k) = val;
1248  C2[i].fastAccessDx(k) = val;
1249  C3[i].fastAccessDx(k) = val;
1250  C4[i].fastAccessDx(k) = val;
1251  }
1252  }
1253 
1254  Teuchos::BLAS<int,FadType> teuchos_blas;
1255  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1256  beta, &C1[0], 1);
1257 
1258  Teuchos::BLAS<int,FadType> sacado_blas(false);
1259  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1260  beta, &C2[0], 1);
1261 
1262  COMPARE_FAD_VECTORS(C1, C2, m);
1263 
1264  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1265  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1266  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1267  beta, &C3[0], 1);
1268 
1269  COMPARE_FAD_VECTORS(C1, C3, m);
1270 
1271  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1,
1272  beta, &C4[0], 1);
1273 
1274  COMPARE_FAD_VECTORS(C1, C4, m);
1275 }
1276 
1277 // Tests with constant A, B
1279  typedef decltype(this->fad) FadType;
1280  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1281  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1282  auto m = this->m_;
1283  auto n = this->n_;
1284  auto ndot = this->ndot_;
1285 
1286  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1287  C4(m,ndot);
1288  std::vector<ScalarType> a(m*n), b(n);
1289  for (unsigned int j=0; j<n; j++) {
1290  for (unsigned int i=0; i<m; i++) {
1291  a[i+j*m] = this->urand.number();
1292  A[i+j*m] = a[i+j*m];
1293  }
1294  b[j] = this->urand.number();
1295  B[j] = b[j];
1296  }
1297  FadType alpha(ndot, this->urand.number());
1298  FadType beta(ndot, this->urand.number());
1299  for (unsigned int k=0; k<ndot; k++) {
1300  alpha.fastAccessDx(k) = this->urand.number();
1301  beta.fastAccessDx(k) = this->urand.number();
1302  }
1303 
1304  for (unsigned int i=0; i<m; i++) {
1305  ScalarType val = this->urand.number();
1306  C1[i] = FadType(ndot, val);
1307  C2[i] = FadType(ndot, val);
1308  C3[i] = FadType(ndot, val);
1309  C4[i] = FadType(ndot, val);
1310  for (unsigned int k=0; k<ndot; k++) {
1311  val = this->urand.number();
1312  C1[i].fastAccessDx(k) = val;
1313  C2[i].fastAccessDx(k) = val;
1314  C3[i].fastAccessDx(k) = val;
1315  C4[i].fastAccessDx(k) = val;
1316  }
1317  }
1318 
1319  Teuchos::BLAS<int,FadType> teuchos_blas;
1320  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1321  beta, &C1[0], 1);
1322 
1323  Teuchos::BLAS<int,FadType> sacado_blas(false);
1324  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1325  beta, &C2[0], 1);
1326 
1327  COMPARE_FAD_VECTORS(C1, C2, m);
1328 
1329  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1330  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1331  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1332  beta, &C3[0], 1);
1333 
1334  COMPARE_FAD_VECTORS(C1, C3, m);
1335 
1336  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1,
1337  beta, &C4[0], 1);
1338 
1339  COMPARE_FAD_VECTORS(C1, C4, m);
1340 }
1341 
1342 // Tests all arguments
1344  typedef decltype(this->fad) FadType;
1345  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1346  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1347  auto n = this->n_;
1348  auto ndot = this->ndot_;
1349 
1350  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1351  for (unsigned int j=0; j<n; j++) {
1352  for (unsigned int i=0; i<n; i++) {
1353  A[i+j*n] = FadType(ndot, this->urand.number());
1354  for (unsigned int k=0; k<ndot; k++)
1355  A[i+j*n].fastAccessDx(k) = this->urand.number();
1356  }
1357  ScalarType val = this->urand.number();
1358  x1[j] = FadType(ndot, val);
1359  x2[j] = FadType(ndot, val);
1360  x3[j] = FadType(ndot, val);
1361  for (unsigned int k=0; k<ndot; k++) {
1362  val = this->urand.number();
1363  x1[j].fastAccessDx(k) = val;
1364  x2[j].fastAccessDx(k) = val;
1365  x3[j].fastAccessDx(k) = val;
1366  }
1367  }
1368 
1369  Teuchos::BLAS<int,FadType> teuchos_blas;
1371  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1372 
1373  Teuchos::BLAS<int,FadType> sacado_blas(false);
1375  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1376 
1377  COMPARE_FAD_VECTORS(x1, x2, n);
1378 
1379  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1380  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1382  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1383 
1384  COMPARE_FAD_VECTORS(x1, x3, n);
1385 
1387  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1389  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1391  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1392  COMPARE_FAD_VECTORS(x1, x2, n);
1393  COMPARE_FAD_VECTORS(x1, x3, n);
1394 
1395  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1396  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1397  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1398  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1399  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1400  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1401  COMPARE_FAD_VECTORS(x1, x2, n);
1402  COMPARE_FAD_VECTORS(x1, x3, n);
1403 
1404  for (unsigned int i=0; i<n; i++) {
1405  A[i*n+i].val() = 1.0;
1406  for (unsigned int k=0; k<ndot; k++)
1407  A[i*n+i].fastAccessDx(k) = 0.0;
1408  }
1410  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1412  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1414  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1415  COMPARE_FAD_VECTORS(x1, x2, n);
1416  COMPARE_FAD_VECTORS(x1, x3, n);
1417 }
1418 
1419 // Tests non unit inc, different lda
1421  typedef decltype(this->fad) FadType;
1422  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1423  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1424  auto n = this->n_;
1425  auto ndot = this->ndot_;
1426 
1427  unsigned int lda = n+3;
1428  unsigned int incx = 2;
1429  VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1430  for (unsigned int j=0; j<n; j++) {
1431  for (unsigned int i=0; i<lda; i++) {
1432  A[i+j*lda] = FadType(ndot, this->urand.number());
1433  for (unsigned int k=0; k<ndot; k++)
1434  A[i+j*lda].fastAccessDx(k) = this->urand.number();
1435  }
1436  }
1437  for (unsigned int j=0; j<n*incx; j++) {
1438  ScalarType val = this->urand.number();
1439  x1[j] = FadType(ndot, val);
1440  x2[j] = FadType(ndot, val);
1441  x3[j] = FadType(ndot, val);
1442  for (unsigned int k=0; k<ndot; k++) {
1443  val = this->urand.number();
1444  x1[j].fastAccessDx(k) = val;
1445  x2[j].fastAccessDx(k) = val;
1446  x3[j].fastAccessDx(k) = val;
1447  }
1448  }
1449 
1450  Teuchos::BLAS<int,FadType> teuchos_blas;
1452  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1453 
1454  Teuchos::BLAS<int,FadType> sacado_blas(false);
1456  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1457 
1458  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1459 
1460  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1461  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1463  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1464 
1465  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1466 
1468  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1470  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1472  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1473  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1474  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1475 
1476  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1477  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1478  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1479  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1480  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1481  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1482  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1483  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1484 
1485  for (unsigned int i=0; i<n; i++) {
1486  A[i*lda+i].val() = 1.0;
1487  for (unsigned int k=0; k<ndot; k++)
1488  A[i*lda+i].fastAccessDx(k) = 0.0;
1489  }
1491  Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1493  Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1495  Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1496  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1497  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1498 }
1499 
1500 // Tests A constant
1502  typedef decltype(this->fad) FadType;
1503  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1504  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1505  auto n = this->n_;
1506  auto ndot = this->ndot_;
1507 
1508  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1509  x5(n,ndot);
1510  std::vector<ScalarType> a(n*n);
1511  for (unsigned int j=0; j<n; j++) {
1512  for (unsigned int i=0; i<n; i++) {
1513  a[i+j*n] = this->urand.number();
1514  A[i+j*n] = a[i+j*n];
1515  }
1516  ScalarType val = this->urand.number();
1517  x1[j] = FadType(ndot, val);
1518  x2[j] = FadType(ndot, val);
1519  x3[j] = FadType(ndot, val);
1520  x4[j] = FadType(ndot, val);
1521  x5[j] = FadType(ndot, val);
1522  for (unsigned int k=0; k<ndot; k++) {
1523  val = this->urand.number();
1524  x1[j].fastAccessDx(k) = val;
1525  x2[j].fastAccessDx(k) = val;
1526  x3[j].fastAccessDx(k) = val;
1527  x4[j].fastAccessDx(k) = val;
1528  x5[j].fastAccessDx(k) = val;
1529  }
1530  }
1531 
1532  Teuchos::BLAS<int,FadType> teuchos_blas;
1534  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1535 
1536  Teuchos::BLAS<int,FadType> sacado_blas(false);
1538  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1539 
1540  COMPARE_FAD_VECTORS(x1, x2, n);
1541 
1542  unsigned int sz = n*n+n*(1+ndot);
1543  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1544  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1545  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1546 
1547  COMPARE_FAD_VECTORS(x1, x3, n);
1548 
1550  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1551 
1552  COMPARE_FAD_VECTORS(x1, x4, n);
1553 
1554  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1555  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
1556 
1557  COMPARE_FAD_VECTORS(x1, x5, n);
1558 
1560  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1562  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1563  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1564  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1566  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1567  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1568  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1569  COMPARE_FAD_VECTORS(x1, x2, n);
1570  COMPARE_FAD_VECTORS(x1, x3, n);
1571  COMPARE_FAD_VECTORS(x1, x4, n);
1572  COMPARE_FAD_VECTORS(x1, x5, n);
1573 
1574  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1575  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1576  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1577  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1578  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1579  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1580  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1581  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1582  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1583  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1584  COMPARE_FAD_VECTORS(x1, x2, n);
1585  COMPARE_FAD_VECTORS(x1, x3, n);
1586  COMPARE_FAD_VECTORS(x1, x4, n);
1587  COMPARE_FAD_VECTORS(x1, x5, n);
1588 
1589  for (unsigned int i=0; i<n; i++) {
1590  A[i*n+i].val() = 1.0;
1591  for (unsigned int k=0; k<ndot; k++)
1592  A[i*n+i].fastAccessDx(k) = 0.0;
1593  }
1595  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1597  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1598  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1599  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1601  Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1602  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1603  Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1604  COMPARE_FAD_VECTORS(x1, x2, n);
1605  COMPARE_FAD_VECTORS(x1, x3, n);
1606  COMPARE_FAD_VECTORS(x1, x4, n);
1607  COMPARE_FAD_VECTORS(x1, x5, n);
1608 }
1609 
1610 // Tests x constant
1612  typedef decltype(this->fad) FadType;
1613  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1614  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1615  auto n = this->n_;
1616  auto ndot = this->ndot_;
1617 
1618  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1619  for (unsigned int j=0; j<n; j++) {
1620  for (unsigned int i=0; i<n; i++) {
1621  A[i+j*n] = FadType(ndot, this->urand.number());
1622  for (unsigned int k=0; k<ndot; k++)
1623  A[i+j*n].fastAccessDx(k) = this->urand.number();
1624  }
1625  ScalarType val = this->urand.number();
1626  x1[j] = val;
1627  x2[j] = val;
1628  x3[j] = val;
1629  }
1630 
1631  Teuchos::BLAS<int,FadType> teuchos_blas;
1633  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1634 
1635  Teuchos::BLAS<int,FadType> sacado_blas(false);
1637  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1638 
1639  COMPARE_FAD_VECTORS(x1, x2, n);
1640 
1641  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1642  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1644  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1645 
1646  COMPARE_FAD_VECTORS(x1, x3, n);
1647 
1649  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1651  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1653  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1654  COMPARE_FAD_VECTORS(x1, x2, n);
1655  COMPARE_FAD_VECTORS(x1, x3, n);
1656 
1657  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1658  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1659  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1660  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1661  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1662  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1663  COMPARE_FAD_VECTORS(x1, x2, n);
1664  COMPARE_FAD_VECTORS(x1, x3, n);
1665 
1666  for (unsigned int i=0; i<n; i++) {
1667  A[i*n+i].val() = 1.0;
1668  for (unsigned int k=0; k<ndot; k++)
1669  A[i*n+i].fastAccessDx(k) = 0.0;
1670  }
1672  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1674  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1676  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1677  COMPARE_FAD_VECTORS(x1, x2, n);
1678  COMPARE_FAD_VECTORS(x1, x3, n);
1679 }
1680 
1681 // Tests all arguments
1683  typedef decltype(this->fad) FadType;
1684  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1685  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1686  auto m = this->m_;
1687  auto n = this->n_;
1688  auto ndot = this->ndot_;
1689 
1690 
1691  // GER is apparently not defined in the BLAS for complex types
1693  return;
1694 
1695  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1696  for (unsigned int j=0; j<n; j++) {
1697  for (unsigned int i=0; i<m; i++) {
1698  ScalarType val = this->urand.number();
1699  A1[i+j*m] = FadType(ndot, val);
1700  A2[i+j*m] = FadType(ndot, val);
1701  A3[i+j*m] = FadType(ndot, val);
1702  for (unsigned int k=0; k<ndot; k++) {
1703  val = this->urand.number();
1704  A1[i+j*m].fastAccessDx(k) = val;
1705  A2[i+j*m].fastAccessDx(k) = val;
1706  A3[i+j*m].fastAccessDx(k) = val;
1707  }
1708  }
1709  }
1710  for (unsigned int i=0; i<m; i++) {
1711  x[i] = FadType(ndot, this->urand.number());
1712  for (unsigned int k=0; k<ndot; k++)
1713  x[i].fastAccessDx(k) = this->urand.number();
1714  }
1715  for (unsigned int i=0; i<n; i++) {
1716  y[i] = FadType(ndot, this->urand.number());
1717  for (unsigned int k=0; k<ndot; k++)
1718  y[i].fastAccessDx(k) = this->urand.number();
1719  }
1720  FadType alpha(ndot, this->urand.number());
1721  for (unsigned int k=0; k<ndot; k++) {
1722  alpha.fastAccessDx(k) = this->urand.number();
1723  }
1724 
1725  Teuchos::BLAS<int,FadType> teuchos_blas;
1726  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1727 
1728  Teuchos::BLAS<int,FadType> sacado_blas(false);
1729  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1730 
1731  COMPARE_FAD_VECTORS(A1, A2, m*n);
1732 
1733  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1734  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1735  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1736 
1737  COMPARE_FAD_VECTORS(A1, A3, m*n);
1738 }
1739 
1740 // Tests non unit inc, different lda
1742  typedef decltype(this->fad) FadType;
1743  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1744  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1745  auto m = this->m_;
1746  auto n = this->n_;
1747  auto ndot = this->ndot_;
1748 
1749 
1750  // GER is apparently not defined in the BLAS for complex types
1752  return;
1753 
1754  unsigned int lda = m+3;
1755  unsigned int incx = 2;
1756  unsigned int incy = 3;
1757  VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1758  y(n*incy,ndot);
1759  for (unsigned int j=0; j<n; j++) {
1760  for (unsigned int i=0; i<lda; i++) {
1761  ScalarType val = this->urand.number();
1762  A1[i+j*lda] = FadType(ndot, val);
1763  A2[i+j*lda] = FadType(ndot, val);
1764  A3[i+j*lda] = FadType(ndot, val);
1765  for (unsigned int k=0; k<ndot; k++) {
1766  val = this->urand.number();
1767  A1[i+j*lda].fastAccessDx(k) = val;
1768  A2[i+j*lda].fastAccessDx(k) = val;
1769  A3[i+j*lda].fastAccessDx(k) = val;
1770  }
1771  }
1772  }
1773  for (unsigned int i=0; i<m*incx; i++) {
1774  x[i] = FadType(ndot, this->urand.number());
1775  for (unsigned int k=0; k<ndot; k++)
1776  x[i].fastAccessDx(k) = this->urand.number();
1777  }
1778  for (unsigned int i=0; i<n*incy; i++) {
1779  y[i] = FadType(ndot, this->urand.number());
1780  for (unsigned int k=0; k<ndot; k++)
1781  y[i].fastAccessDx(k) = this->urand.number();
1782  }
1783  FadType alpha(ndot, this->urand.number());
1784  for (unsigned int k=0; k<ndot; k++) {
1785  alpha.fastAccessDx(k) = this->urand.number();
1786  }
1787 
1788  Teuchos::BLAS<int,FadType> teuchos_blas;
1789  teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1790 
1791  Teuchos::BLAS<int,FadType> sacado_blas(false);
1792  sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1793 
1794  COMPARE_FAD_VECTORS(A1, A2, lda*n);
1795 
1796  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1797  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1798  sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1799 
1800  COMPARE_FAD_VECTORS(A1, A3, lda*n);
1801 }
1802 
1803 // Tests constant alpha
1805  typedef decltype(this->fad) FadType;
1806  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1807  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1808  auto m = this->m_;
1809  auto n = this->n_;
1810  auto ndot = this->ndot_;
1811 
1812 
1813  // GER is apparently not defined in the BLAS for complex types
1815  return;
1816 
1817  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1818  for (unsigned int j=0; j<n; j++) {
1819  for (unsigned int i=0; i<m; i++) {
1820  ScalarType val = this->urand.number();
1821  A1[i+j*m] = FadType(ndot, val);
1822  A2[i+j*m] = FadType(ndot, val);
1823  A3[i+j*m] = FadType(ndot, val);
1824  for (unsigned int k=0; k<ndot; k++) {
1825  val = this->urand.number();
1826  A1[i+j*m].fastAccessDx(k) = val;
1827  A2[i+j*m].fastAccessDx(k) = val;
1828  A3[i+j*m].fastAccessDx(k) = val;
1829  }
1830  }
1831  }
1832  for (unsigned int i=0; i<m; i++) {
1833  x[i] = FadType(ndot, this->urand.number());
1834  for (unsigned int k=0; k<ndot; k++)
1835  x[i].fastAccessDx(k) = this->urand.number();
1836  }
1837  for (unsigned int i=0; i<n; i++) {
1838  y[i] = FadType(ndot, this->urand.number());
1839  for (unsigned int k=0; k<ndot; k++)
1840  y[i].fastAccessDx(k) = this->urand.number();
1841  }
1842  ScalarType alpha = this->urand.number();
1843 
1844  Teuchos::BLAS<int,FadType> teuchos_blas;
1845  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1846 
1847  Teuchos::BLAS<int,FadType> sacado_blas(false);
1848  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1849 
1850  COMPARE_FAD_VECTORS(A1, A2, m*n);
1851 
1852  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1853  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1854  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1855 
1856  COMPARE_FAD_VECTORS(A1, A3, m*n);
1857 }
1858 
1859 // Tests constant x
1861  typedef decltype(this->fad) FadType;
1862  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1863  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1864  auto m = this->m_;
1865  auto n = this->n_;
1866  auto ndot = this->ndot_;
1867 
1868 
1869  // GER is apparently not defined in the BLAS for complex types
1871  return;
1872 
1873  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1874  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1875  std::vector<ScalarType> xx(m);
1876  for (unsigned int j=0; j<n; j++) {
1877  for (unsigned int i=0; i<m; i++) {
1878  ScalarType val = this->urand.number();
1879  A1[i+j*m] = FadType(ndot, val);
1880  A2[i+j*m] = FadType(ndot, val);
1881  A3[i+j*m] = FadType(ndot, val);
1882  A4[i+j*m] = FadType(ndot, val);
1883  A5[i+j*m] = FadType(ndot, val);
1884  for (unsigned int k=0; k<ndot; k++) {
1885  val = this->urand.number();
1886  A1[i+j*m].fastAccessDx(k) = val;
1887  A2[i+j*m].fastAccessDx(k) = val;
1888  A3[i+j*m].fastAccessDx(k) = val;
1889  A4[i+j*m].fastAccessDx(k) = val;
1890  A5[i+j*m].fastAccessDx(k) = val;
1891  }
1892  }
1893  }
1894  for (unsigned int i=0; i<m; i++) {
1895  xx[i] = this->urand.number();
1896  x[i] = xx[i];
1897  }
1898  for (unsigned int i=0; i<n; i++) {
1899  y[i] = FadType(ndot, this->urand.number());
1900  for (unsigned int k=0; k<ndot; k++)
1901  y[i].fastAccessDx(k) = this->urand.number();
1902  }
1903  FadType alpha(ndot, this->urand.number());
1904  for (unsigned int k=0; k<ndot; k++) {
1905  alpha.fastAccessDx(k) = this->urand.number();
1906  }
1907 
1908  Teuchos::BLAS<int,FadType> teuchos_blas;
1909  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1910 
1911  Teuchos::BLAS<int,FadType> sacado_blas(false);
1912  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1913 
1914  COMPARE_FAD_VECTORS(A1, A2, m*n);
1915 
1916  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1917  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1918  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1919 
1920  COMPARE_FAD_VECTORS(A1, A3, m*n);
1921 
1922  sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
1923 
1924  COMPARE_FAD_VECTORS(A1, A4, m*n);
1925 
1926  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
1927 
1928  COMPARE_FAD_VECTORS(A1, A5, m*n);
1929 }
1930 
1931 // Tests constant y
1933  typedef decltype(this->fad) FadType;
1934  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1935  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
1936  auto m = this->m_;
1937  auto n = this->n_;
1938  auto ndot = this->ndot_;
1939 
1940 
1941  // GER is apparently not defined in the BLAS for complex types
1943  return;
1944 
1945  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1946  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1947  std::vector<ScalarType> yy(n);
1948  for (unsigned int j=0; j<n; j++) {
1949  for (unsigned int i=0; i<m; i++) {
1950  ScalarType val = this->urand.number();
1951  A1[i+j*m] = FadType(ndot, val);
1952  A2[i+j*m] = FadType(ndot, val);
1953  A3[i+j*m] = FadType(ndot, val);
1954  A4[i+j*m] = FadType(ndot, val);
1955  A5[i+j*m] = FadType(ndot, val);
1956  for (unsigned int k=0; k<ndot; k++) {
1957  val = this->urand.number();
1958  A1[i+j*m].fastAccessDx(k) = val;
1959  A2[i+j*m].fastAccessDx(k) = val;
1960  A3[i+j*m].fastAccessDx(k) = val;
1961  A4[i+j*m].fastAccessDx(k) = val;
1962  A5[i+j*m].fastAccessDx(k) = val;
1963  }
1964  }
1965  }
1966  for (unsigned int i=0; i<m; i++) {
1967  x[i] = FadType(ndot, this->urand.number());
1968  for (unsigned int k=0; k<ndot; k++)
1969  x[i].fastAccessDx(k) = this->urand.number();
1970  }
1971  for (unsigned int i=0; i<n; i++) {
1972  yy[i] = this->urand.number();
1973  y[i] = yy[i];
1974  }
1975  FadType alpha(ndot, this->urand.number());
1976  for (unsigned int k=0; k<ndot; k++) {
1977  alpha.fastAccessDx(k) = this->urand.number();
1978  }
1979 
1980  Teuchos::BLAS<int,FadType> teuchos_blas;
1981  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1982 
1983  Teuchos::BLAS<int,FadType> sacado_blas(false);
1984  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1985 
1986  COMPARE_FAD_VECTORS(A1, A2, m*n);
1987 
1988  unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1989  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1990  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1991 
1992  COMPARE_FAD_VECTORS(A1, A3, m*n);
1993 
1994  sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
1995 
1996  COMPARE_FAD_VECTORS(A1, A4, m*n);
1997 
1998  sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
1999 
2000  COMPARE_FAD_VECTORS(A1, A5, m*n);
2001 }
2002 
2003 // Tests constant x and y
2005  typedef decltype(this->fad) FadType;
2006  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2007  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2008  auto m = this->m_;
2009  auto n = this->n_;
2010  auto ndot = this->ndot_;
2011 
2012 
2013  // GER is apparently not defined in the BLAS for complex types
2015  return;
2016 
2017  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2018  A5(m*n,ndot), x(m,ndot), y(n,ndot);
2019  std::vector<ScalarType> xx(n), yy(n);
2020  for (unsigned int j=0; j<n; j++) {
2021  for (unsigned int i=0; i<m; i++) {
2022  ScalarType val = this->urand.number();
2023  A1[i+j*m] = FadType(ndot, val);
2024  A2[i+j*m] = FadType(ndot, val);
2025  A3[i+j*m] = FadType(ndot, val);
2026  A4[i+j*m] = FadType(ndot, val);
2027  A5[i+j*m] = FadType(ndot, val);
2028  for (unsigned int k=0; k<ndot; k++) {
2029  val = this->urand.number();
2030  A1[i+j*m].fastAccessDx(k) = val;
2031  A2[i+j*m].fastAccessDx(k) = val;
2032  A3[i+j*m].fastAccessDx(k) = val;
2033  A4[i+j*m].fastAccessDx(k) = val;
2034  A5[i+j*m].fastAccessDx(k) = val;
2035  }
2036  }
2037  }
2038  for (unsigned int i=0; i<m; i++) {
2039  xx[i] = this->urand.number();
2040  x[i] = xx[i];
2041  }
2042  for (unsigned int i=0; i<n; i++) {
2043  yy[i] = this->urand.number();
2044  y[i] = yy[i];
2045  }
2046  FadType alpha(ndot, this->urand.number());
2047  for (unsigned int k=0; k<ndot; k++) {
2048  alpha.fastAccessDx(k) = this->urand.number();
2049  }
2050 
2051  Teuchos::BLAS<int,FadType> teuchos_blas;
2052  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2053 
2054  Teuchos::BLAS<int,FadType> sacado_blas(false);
2055  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2056 
2057  COMPARE_FAD_VECTORS(A1, A2, m*n);
2058 
2059  unsigned int sz = m*n*(1+ndot) + m + n;
2060  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2061  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2062 
2063  COMPARE_FAD_VECTORS(A1, A3, m*n);
2064 
2065  sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2066 
2067  COMPARE_FAD_VECTORS(A1, A4, m*n);
2068 
2069  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2070 
2071  COMPARE_FAD_VECTORS(A1, A5, m*n);
2072 }
2073 
2074 // Tests constant A
2076  typedef decltype(this->fad) FadType;
2077  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2078  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2079  auto m = this->m_;
2080  auto n = this->n_;
2081  auto ndot = this->ndot_;
2082 
2083 
2084  // GER is apparently not defined in the BLAS for complex types
2086  return;
2087 
2088  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2089  for (unsigned int j=0; j<n; j++) {
2090  for (unsigned int i=0; i<m; i++) {
2091  ScalarType val = this->urand.number();
2092  A1[i+j*m] = val;
2093  A2[i+j*m] = val;
2094  A3[i+j*m] = val;
2095  }
2096  }
2097  for (unsigned int i=0; i<m; i++) {
2098  x[i] = FadType(ndot, this->urand.number());
2099  for (unsigned int k=0; k<ndot; k++)
2100  x[i].fastAccessDx(k) = this->urand.number();
2101  }
2102  for (unsigned int i=0; i<n; i++) {
2103  y[i] = FadType(ndot, this->urand.number());
2104  for (unsigned int k=0; k<ndot; k++)
2105  y[i].fastAccessDx(k) = this->urand.number();
2106  }
2107  FadType alpha(ndot, this->urand.number());
2108  for (unsigned int k=0; k<ndot; k++) {
2109  alpha.fastAccessDx(k) = this->urand.number();
2110  }
2111 
2112  Teuchos::BLAS<int,FadType> teuchos_blas;
2113  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2114 
2115  Teuchos::BLAS<int,FadType> sacado_blas(false);
2116  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2117 
2118  COMPARE_FAD_VECTORS(A1, A2, m*n);
2119 
2120  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2121  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2122  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2123 
2124  COMPARE_FAD_VECTORS(A1, A3, m*n);
2125 }
2126 
2127 // Tests all arguments
2129  typedef decltype(this->fad) FadType;
2130  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2131  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2132  auto m = this->m_;
2133  auto n = this->n_;
2134  auto l = this->l_;
2135  auto ndot = this->ndot_;
2136 
2137  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2138  for (unsigned int j=0; j<l; j++) {
2139  for (unsigned int i=0; i<m; i++) {
2140  A[i+j*m] = FadType(ndot, this->urand.number());
2141  for (unsigned int k=0; k<ndot; k++)
2142  A[i+j*m].fastAccessDx(k) = this->urand.number();
2143  }
2144  }
2145  for (unsigned int j=0; j<n; j++) {
2146  for (unsigned int i=0; i<l; i++) {
2147  B[i+j*l] = FadType(ndot, this->urand.number());
2148  for (unsigned int k=0; k<ndot; k++)
2149  B[i+j*l].fastAccessDx(k) = this->urand.number();
2150  }
2151  }
2152  FadType alpha(ndot, this->urand.number());
2153  FadType beta(ndot, this->urand.number());
2154  for (unsigned int k=0; k<ndot; k++) {
2155  alpha.fastAccessDx(k) = this->urand.number();
2156  beta.fastAccessDx(k) = this->urand.number();
2157  }
2158 
2159  for (unsigned int j=0; j<n; j++) {
2160  for (unsigned int i=0; i<m; i++) {
2161  ScalarType val = this->urand.number();
2162  C1[i+j*m] = FadType(ndot, val);
2163  C2[i+j*m] = FadType(ndot, val);
2164  C3[i+j*m] = FadType(ndot, val);
2165  for (unsigned int k=0; k<ndot; k++) {
2166  val = this->urand.number();
2167  C1[i+j*m].fastAccessDx(k) = val;
2168  C2[i+j*m].fastAccessDx(k) = val;
2169  C3[i+j*m].fastAccessDx(k) = val;
2170  }
2171  }
2172  }
2173 
2174  Teuchos::BLAS<int,FadType> teuchos_blas;
2175  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2176  &A[0], m, &B[0], l, beta, &C1[0], m);
2177 
2178  Teuchos::BLAS<int,FadType> sacado_blas(false);
2179  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2180  &A[0], m, &B[0], l, beta, &C2[0], m);
2181 
2182  COMPARE_FAD_VECTORS(C1, C2, m*n);
2183 
2184  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2185  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2186  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2187  &A[0], m, &B[0], l, beta, &C3[0], m);
2188 
2189  COMPARE_FAD_VECTORS(C1, C3, m*n);
2190 
2191  // transa
2192  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2193  &A[0], l, &B[0], l, beta, &C1[0], m);
2194  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2195  &A[0], l, &B[0], l, beta, &C2[0], m);
2196  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2197  &A[0], l, &B[0], l, beta, &C3[0], m);
2198 
2199  COMPARE_FAD_VECTORS(C1, C2, m*n);
2200  COMPARE_FAD_VECTORS(C1, C3, m*n);
2201 
2202  // transb
2203  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2204  &A[0], m, &B[0], n, beta, &C1[0], m);
2205  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2206  &A[0], m, &B[0], n, beta, &C2[0], m);
2207  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2208  &A[0], m, &B[0], n, beta, &C3[0], m);
2209 
2210  COMPARE_FAD_VECTORS(C1, C2, m*n);
2211  COMPARE_FAD_VECTORS(C1, C3, m*n);
2212 
2213  // transa and transb
2214  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2215  &A[0], l, &B[0], n, beta, &C1[0], m);
2216  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2217  &A[0], l, &B[0], n, beta, &C2[0], m);
2218  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2219  &A[0], l, &B[0], n, beta, &C3[0], m);
2220 
2221  COMPARE_FAD_VECTORS(C1, C2, m*n);
2222  COMPARE_FAD_VECTORS(C1, C3, m*n);
2223 }
2224 
2225 // Tests different lda, ldb, ldc
2227  typedef decltype(this->fad) FadType;
2228  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2229  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2230  auto m = this->m_;
2231  auto n = this->n_;
2232  auto l = this->l_;
2233  auto ndot = this->ndot_;
2234 
2235  unsigned int lda = m+4;
2236  unsigned int ldb = l+4;
2237  unsigned int ldc = m+5;
2238  VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2239  C3(ldc*n,ndot);
2240  for (unsigned int j=0; j<l; j++) {
2241  for (unsigned int i=0; i<lda; i++) {
2242  A[i+j*lda] = FadType(ndot, this->urand.number());
2243  for (unsigned int k=0; k<ndot; k++)
2244  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2245  }
2246  }
2247  for (unsigned int j=0; j<n; j++) {
2248  for (unsigned int i=0; i<ldb; i++) {
2249  B[i+j*ldb] = FadType(ndot, this->urand.number());
2250  for (unsigned int k=0; k<ndot; k++)
2251  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2252  }
2253  }
2254  FadType alpha(ndot, this->urand.number());
2255  FadType beta(ndot, this->urand.number());
2256  for (unsigned int k=0; k<ndot; k++) {
2257  alpha.fastAccessDx(k) = this->urand.number();
2258  beta.fastAccessDx(k) = this->urand.number();
2259  }
2260 
2261  for (unsigned int j=0; j<n; j++) {
2262  for (unsigned int i=0; i<ldc; i++) {
2263  ScalarType val = this->urand.number();
2264  C1[i+j*ldc] = FadType(ndot, val);
2265  C2[i+j*ldc] = FadType(ndot, val);
2266  C3[i+j*ldc] = FadType(ndot, val);
2267  for (unsigned int k=0; k<ndot; k++) {
2268  val = this->urand.number();
2269  C1[i+j*ldc].fastAccessDx(k) = val;
2270  C2[i+j*ldc].fastAccessDx(k) = val;
2271  C3[i+j*ldc].fastAccessDx(k) = val;
2272  }
2273  }
2274  }
2275 
2276  Teuchos::BLAS<int,FadType> teuchos_blas;
2277  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2279 
2280  Teuchos::BLAS<int,FadType> sacado_blas(false);
2281  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2282  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2283 
2284  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2285 
2286  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2287  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2288  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2289  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2290 
2291  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2292 }
2293 
2294 // Tests different lda, ldb, ldc with transa
2296  typedef decltype(this->fad) FadType;
2297  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2298  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2299  auto m = this->m_;
2300  auto n = this->n_;
2301  auto l = this->l_;
2302  auto ndot = this->ndot_;
2303 
2304  unsigned int lda = l+3;
2305  unsigned int ldb = l+4;
2306  unsigned int ldc = m+5;
2307  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2308  C3(ldc*n,ndot);
2309  for (unsigned int j=0; j<m; j++) {
2310  for (unsigned int i=0; i<lda; i++) {
2311  A[i+j*lda] = FadType(ndot, this->urand.number());
2312  for (unsigned int k=0; k<ndot; k++)
2313  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2314  }
2315  }
2316  for (unsigned int j=0; j<n; j++) {
2317  for (unsigned int i=0; i<ldb; i++) {
2318  B[i+j*ldb] = FadType(ndot, this->urand.number());
2319  for (unsigned int k=0; k<ndot; k++)
2320  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2321  }
2322  }
2323  FadType alpha(ndot, this->urand.number());
2324  FadType beta(ndot, this->urand.number());
2325  for (unsigned int k=0; k<ndot; k++) {
2326  alpha.fastAccessDx(k) = this->urand.number();
2327  beta.fastAccessDx(k) = this->urand.number();
2328  }
2329 
2330  for (unsigned int j=0; j<n; j++) {
2331  for (unsigned int i=0; i<ldc; i++) {
2332  ScalarType val = this->urand.number();
2333  C1[i+j*ldc] = FadType(ndot, val);
2334  C2[i+j*ldc] = FadType(ndot, val);
2335  C3[i+j*ldc] = FadType(ndot, val);
2336  for (unsigned int k=0; k<ndot; k++) {
2337  val = this->urand.number();
2338  C1[i+j*ldc].fastAccessDx(k) = val;
2339  C2[i+j*ldc].fastAccessDx(k) = val;
2340  C3[i+j*ldc].fastAccessDx(k) = val;
2341  }
2342  }
2343  }
2344 
2345  Teuchos::BLAS<int,FadType> teuchos_blas;
2346  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2347  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2348 
2349  Teuchos::BLAS<int,FadType> sacado_blas(false);
2350  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2351  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2352 
2353  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2354 
2355  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2356  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2357  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2359 
2360  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2361 }
2362 
2363 // Tests different lda, ldb, ldc with transb
2365  typedef decltype(this->fad) FadType;
2366  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2367  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2368  auto m = this->m_;
2369  auto n = this->n_;
2370  auto l = this->l_;
2371  auto ndot = this->ndot_;
2372 
2373  unsigned int lda = m+4;
2374  unsigned int ldb = n+4;
2375  unsigned int ldc = m+5;
2376  VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2377  C3(ldc*n,ndot);
2378  for (unsigned int j=0; j<l; j++) {
2379  for (unsigned int i=0; i<lda; i++) {
2380  A[i+j*lda] = FadType(ndot, this->urand.number());
2381  for (unsigned int k=0; k<ndot; k++)
2382  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2383  }
2384  }
2385  for (unsigned int j=0; j<l; j++) {
2386  for (unsigned int i=0; i<ldb; i++) {
2387  B[i+j*ldb] = FadType(ndot, this->urand.number());
2388  for (unsigned int k=0; k<ndot; k++)
2389  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2390  }
2391  }
2392  FadType alpha(ndot, this->urand.number());
2393  FadType beta(ndot, this->urand.number());
2394  for (unsigned int k=0; k<ndot; k++) {
2395  alpha.fastAccessDx(k) = this->urand.number();
2396  beta.fastAccessDx(k) = this->urand.number();
2397  }
2398 
2399  for (unsigned int j=0; j<n; j++) {
2400  for (unsigned int i=0; i<ldc; i++) {
2401  ScalarType val = this->urand.number();
2402  C1[i+j*ldc] = FadType(ndot, val);
2403  C2[i+j*ldc] = FadType(ndot, val);
2404  C3[i+j*ldc] = FadType(ndot, val);
2405  for (unsigned int k=0; k<ndot; k++) {
2406  val = this->urand.number();
2407  C1[i+j*ldc].fastAccessDx(k) = val;
2408  C2[i+j*ldc].fastAccessDx(k) = val;
2409  C3[i+j*ldc].fastAccessDx(k) = val;
2410  }
2411  }
2412  }
2413 
2414  Teuchos::BLAS<int,FadType> teuchos_blas;
2415  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2416  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2417 
2418  Teuchos::BLAS<int,FadType> sacado_blas(false);
2419  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2420  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2421 
2422  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2423 
2424  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2425  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2426  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2427  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2428 
2429  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2430 }
2431 
2432 // Tests different lda, ldb, ldc with transa and transb
2434  typedef decltype(this->fad) FadType;
2435  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2436  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2437  auto m = this->m_;
2438  auto n = this->n_;
2439  auto l = this->l_;
2440  auto ndot = this->ndot_;
2441 
2442  unsigned int lda = l+3;
2443  unsigned int ldb = n+4;
2444  unsigned int ldc = m+5;
2445  VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2446  C3(ldc*n,ndot);
2447  for (unsigned int j=0; j<m; j++) {
2448  for (unsigned int i=0; i<lda; i++) {
2449  A[i+j*lda] = FadType(ndot, this->urand.number());
2450  for (unsigned int k=0; k<ndot; k++)
2451  A[i+j*lda].fastAccessDx(k) = this->urand.number();
2452  }
2453  }
2454  for (unsigned int j=0; j<l; j++) {
2455  for (unsigned int i=0; i<ldb; i++) {
2456  B[i+j*ldb] = FadType(ndot, this->urand.number());
2457  for (unsigned int k=0; k<ndot; k++)
2458  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2459  }
2460  }
2461  FadType alpha(ndot, this->urand.number());
2462  FadType beta(ndot, this->urand.number());
2463  for (unsigned int k=0; k<ndot; k++) {
2464  alpha.fastAccessDx(k) = this->urand.number();
2465  beta.fastAccessDx(k) = this->urand.number();
2466  }
2467 
2468  for (unsigned int j=0; j<n; j++) {
2469  for (unsigned int i=0; i<ldc; i++) {
2470  ScalarType val = this->urand.number();
2471  C1[i+j*ldc] = FadType(ndot, val);
2472  C2[i+j*ldc] = FadType(ndot, val);
2473  C3[i+j*ldc] = FadType(ndot, val);
2474  for (unsigned int k=0; k<ndot; k++) {
2475  val = this->urand.number();
2476  C1[i+j*ldc].fastAccessDx(k) = val;
2477  C2[i+j*ldc].fastAccessDx(k) = val;
2478  C3[i+j*ldc].fastAccessDx(k) = val;
2479  }
2480  }
2481  }
2482 
2483  Teuchos::BLAS<int,FadType> teuchos_blas;
2484  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2485  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2486 
2487  Teuchos::BLAS<int,FadType> sacado_blas(false);
2488  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2489  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2490 
2491  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2492 
2493  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2494  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2495  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2496  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2497 
2498  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2499 }
2500 
2501 // Tests with constant C
2503  typedef decltype(this->fad) FadType;
2504  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2505  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2506  auto m = this->m_;
2507  auto n = this->n_;
2508  auto l = this->l_;
2509  auto ndot = this->ndot_;
2510 
2511  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2512  for (unsigned int j=0; j<l; j++) {
2513  for (unsigned int i=0; i<m; i++) {
2514  A[i+j*m] = FadType(ndot, this->urand.number());
2515  for (unsigned int k=0; k<ndot; k++)
2516  A[i+j*m].fastAccessDx(k) = this->urand.number();
2517  }
2518  }
2519  for (unsigned int j=0; j<n; j++) {
2520  for (unsigned int i=0; i<l; i++) {
2521  B[i+j*l] = FadType(ndot, this->urand.number());
2522  for (unsigned int k=0; k<ndot; k++)
2523  B[i+j*l].fastAccessDx(k) = this->urand.number();
2524  }
2525  }
2526  FadType alpha(ndot, this->urand.number());
2527  FadType beta(ndot, this->urand.number());
2528  for (unsigned int k=0; k<ndot; k++) {
2529  alpha.fastAccessDx(k) = this->urand.number();
2530  beta.fastAccessDx(k) = this->urand.number();
2531  }
2532 
2533  for (unsigned int j=0; j<n; j++) {
2534  for (unsigned int i=0; i<m; i++) {
2535  ScalarType val = this->urand.number();
2536  C1[i+j*m] = val;
2537  C2[i+j*m] = val;
2538  C3[i+j*m] = val;
2539  }
2540  }
2541 
2542  Teuchos::BLAS<int,FadType> teuchos_blas;
2543  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2544  &A[0], m, &B[0], l, beta, &C1[0], m);
2545 
2546  Teuchos::BLAS<int,FadType> sacado_blas(false);
2547  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2548  &A[0], m, &B[0], l, beta, &C2[0], m);
2549 
2550  COMPARE_FAD_VECTORS(C1, C2, m*n);
2551 
2552  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2553  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2554  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2555  &A[0], m, &B[0], l, beta, &C3[0], m);
2556 
2557  COMPARE_FAD_VECTORS(C1, C3, m*n);
2558 
2559  // transa
2560  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2561  &A[0], l, &B[0], l, beta, &C1[0], m);
2562  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2563  &A[0], l, &B[0], l, beta, &C2[0], m);
2564  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2565  &A[0], l, &B[0], l, beta, &C3[0], m);
2566 
2567  COMPARE_FAD_VECTORS(C1, C2, m*n);
2568  COMPARE_FAD_VECTORS(C1, C3, m*n);
2569 
2570  // transb
2571  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2572  &A[0], m, &B[0], n, beta, &C1[0], m);
2573  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2574  &A[0], m, &B[0], n, beta, &C2[0], m);
2575  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2576  &A[0], m, &B[0], n, beta, &C3[0], m);
2577 
2578  COMPARE_FAD_VECTORS(C1, C2, m*n);
2579  COMPARE_FAD_VECTORS(C1, C3, m*n);
2580 
2581  // transa and transb
2582  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2583  &A[0], l, &B[0], n, beta, &C1[0], m);
2584  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2585  &A[0], l, &B[0], n, beta, &C2[0], m);
2586  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2587  &A[0], l, &B[0], n, beta, &C3[0], m);
2588 
2589  COMPARE_FAD_VECTORS(C1, C2, m*n);
2590  COMPARE_FAD_VECTORS(C1, C3, m*n);
2591 }
2592 
2593 // Tests with constant alpha, beta
2595  typedef decltype(this->fad) FadType;
2596  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2597  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2598  auto m = this->m_;
2599  auto n = this->n_;
2600  auto l = this->l_;
2601  auto ndot = this->ndot_;
2602 
2603  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2604  for (unsigned int j=0; j<l; j++) {
2605  for (unsigned int i=0; i<m; i++) {
2606  A[i+j*m] = FadType(ndot, this->urand.number());
2607  for (unsigned int k=0; k<ndot; k++)
2608  A[i+j*m].fastAccessDx(k) = this->urand.number();
2609  }
2610  }
2611  for (unsigned int j=0; j<n; j++) {
2612  for (unsigned int i=0; i<l; i++) {
2613  B[i+j*l] = FadType(ndot, this->urand.number());
2614  for (unsigned int k=0; k<ndot; k++)
2615  B[i+j*l].fastAccessDx(k) = this->urand.number();
2616  }
2617  }
2618  ScalarType alpha = this->urand.number();
2619  ScalarType beta = this->urand.number();
2620 
2621  for (unsigned int j=0; j<n; j++) {
2622  for (unsigned int i=0; i<m; i++) {
2623  ScalarType val = this->urand.number();
2624  C1[i+j*m] = FadType(ndot, val);
2625  C2[i+j*m] = FadType(ndot, val);
2626  C3[i+j*m] = FadType(ndot, val);
2627  for (unsigned int k=0; k<ndot; k++) {
2628  val = this->urand.number();
2629  C1[i+j*m].fastAccessDx(k) = val;
2630  C2[i+j*m].fastAccessDx(k) = val;
2631  C3[i+j*m].fastAccessDx(k) = val;
2632  }
2633  }
2634  }
2635 
2636  Teuchos::BLAS<int,FadType> teuchos_blas;
2637  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2638  &A[0], m, &B[0], l, beta, &C1[0], m);
2639 
2640  Teuchos::BLAS<int,FadType> sacado_blas(false);
2641  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2642  &A[0], m, &B[0], l, beta, &C2[0], m);
2643 
2644  COMPARE_FAD_VECTORS(C1, C2, m*n);
2645 
2646  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2647  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2648  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2649  &A[0], m, &B[0], l, beta, &C3[0], m);
2650 
2651  COMPARE_FAD_VECTORS(C1, C3, m*n);
2652 
2653  // transa
2654  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2655  &A[0], l, &B[0], l, beta, &C1[0], m);
2656  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2657  &A[0], l, &B[0], l, beta, &C2[0], m);
2658  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2659  &A[0], l, &B[0], l, beta, &C3[0], m);
2660 
2661  COMPARE_FAD_VECTORS(C1, C2, m*n);
2662  COMPARE_FAD_VECTORS(C1, C3, m*n);
2663 
2664  // transb
2665  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2666  &A[0], m, &B[0], n, beta, &C1[0], m);
2667  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2668  &A[0], m, &B[0], n, beta, &C2[0], m);
2669  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2670  &A[0], m, &B[0], n, beta, &C3[0], m);
2671 
2672  COMPARE_FAD_VECTORS(C1, C2, m*n);
2673  COMPARE_FAD_VECTORS(C1, C3, m*n);
2674 
2675  // transa and transb
2676  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2677  &A[0], l, &B[0], n, beta, &C1[0], m);
2678  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2679  &A[0], l, &B[0], n, beta, &C2[0], m);
2680  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2681  &A[0], l, &B[0], n, beta, &C3[0], m);
2682 
2683  COMPARE_FAD_VECTORS(C1, C2, m*n);
2684  COMPARE_FAD_VECTORS(C1, C3, m*n);
2685 }
2686 
2687 // Tests with constant A
2689  typedef decltype(this->fad) FadType;
2690  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2691  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2692  auto m = this->m_;
2693  auto n = this->n_;
2694  auto l = this->l_;
2695  auto ndot = this->ndot_;
2696 
2697  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2698  C4(m*n,ndot), C5(m*n,ndot);
2699  std::vector<ScalarType> a(m*l);
2700  for (unsigned int j=0; j<l; j++) {
2701  for (unsigned int i=0; i<m; i++) {
2702  a[i+j*m] = this->urand.number();
2703  A[i+j*m] = a[i+j*m];
2704  }
2705  }
2706  for (unsigned int j=0; j<n; j++) {
2707  for (unsigned int i=0; i<l; i++) {
2708  B[i+j*l] = FadType(ndot, this->urand.number());
2709  for (unsigned int k=0; k<ndot; k++)
2710  B[i+j*l].fastAccessDx(k) = this->urand.number();
2711  }
2712  }
2713  FadType alpha(ndot, this->urand.number());
2714  FadType beta(ndot, this->urand.number());
2715  for (unsigned int k=0; k<ndot; k++) {
2716  alpha.fastAccessDx(k) = this->urand.number();
2717  beta.fastAccessDx(k) = this->urand.number();
2718  }
2719 
2720  for (unsigned int j=0; j<n; j++) {
2721  for (unsigned int i=0; i<m; i++) {
2722  ScalarType val = this->urand.number();
2723  C1[i+j*m] = FadType(ndot, val);
2724  C2[i+j*m] = FadType(ndot, val);
2725  C3[i+j*m] = FadType(ndot, val);
2726  C4[i+j*m] = FadType(ndot, val);
2727  C5[i+j*m] = FadType(ndot, val);
2728  for (unsigned int k=0; k<ndot; k++) {
2729  val = this->urand.number();
2730  C1[i+j*m].fastAccessDx(k) = val;
2731  C2[i+j*m].fastAccessDx(k) = val;
2732  C3[i+j*m].fastAccessDx(k) = val;
2733  C4[i+j*m].fastAccessDx(k) = val;
2734  C5[i+j*m].fastAccessDx(k) = val;
2735  }
2736  }
2737  }
2738 
2739  Teuchos::BLAS<int,FadType> teuchos_blas;
2740  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2741  &A[0], m, &B[0], l, beta, &C1[0], m);
2742 
2743  Teuchos::BLAS<int,FadType> sacado_blas(false);
2744  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2745  &A[0], m, &B[0], l, beta, &C2[0], m);
2746 
2747  COMPARE_FAD_VECTORS(C1, C2, m*n);
2748 
2749  unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2750  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2751  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2752  &A[0], m, &B[0], l, beta, &C3[0], m);
2753 
2754  COMPARE_FAD_VECTORS(C1, C3, m*n);
2755 
2756  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2757  &a[0], m, &B[0], l, beta, &C4[0], m);
2758 
2759  COMPARE_FAD_VECTORS(C1, C4, m*n);
2760 
2761  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2762  &a[0], m, &B[0], l, beta, &C5[0], m);
2763 
2764  COMPARE_FAD_VECTORS(C1, C5, m*n);
2765 
2766  // transa
2767  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2768  &A[0], l, &B[0], l, beta, &C1[0], m);
2769  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2770  &A[0], l, &B[0], l, beta, &C2[0], m);
2771  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2772  &A[0], l, &B[0], l, beta, &C3[0], m);
2773  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2774  &a[0], l, &B[0], l, beta, &C4[0], m);
2775  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2776  &a[0], l, &B[0], l, beta, &C5[0], m);
2777 
2778  COMPARE_FAD_VECTORS(C1, C2, m*n);
2779  COMPARE_FAD_VECTORS(C1, C3, m*n);
2780  COMPARE_FAD_VECTORS(C1, C4, m*n);
2781  COMPARE_FAD_VECTORS(C1, C5, m*n);
2782 
2783  // transb
2784  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2785  &A[0], m, &B[0], n, beta, &C1[0], m);
2786  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2787  &A[0], m, &B[0], n, beta, &C2[0], m);
2788  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2789  &A[0], m, &B[0], n, beta, &C3[0], m);
2790  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2791  &a[0], m, &B[0], n, beta, &C4[0], m);
2792  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2793  &a[0], m, &B[0], n, beta, &C5[0], m);
2794 
2795  COMPARE_FAD_VECTORS(C1, C2, m*n);
2796  COMPARE_FAD_VECTORS(C1, C3, m*n);
2797  COMPARE_FAD_VECTORS(C1, C4, m*n);
2798  COMPARE_FAD_VECTORS(C1, C5, m*n);
2799 
2800  // transa and transb
2801  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2802  &A[0], l, &B[0], n, beta, &C1[0], m);
2803  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2804  &A[0], l, &B[0], n, beta, &C2[0], m);
2805  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2806  &A[0], l, &B[0], n, beta, &C3[0], m);
2807  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2808  &a[0], l, &B[0], n, beta, &C4[0], m);
2809  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2810  &a[0], l, &B[0], n, beta, &C5[0], m);
2811 
2812  COMPARE_FAD_VECTORS(C1, C2, m*n);
2813  COMPARE_FAD_VECTORS(C1, C3, m*n);
2814  COMPARE_FAD_VECTORS(C1, C4, m*n);
2815  COMPARE_FAD_VECTORS(C1, C5, m*n);
2816 }
2817 
2818 // Tests with constant B
2820  typedef decltype(this->fad) FadType;
2821  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2822  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2823  auto m = this->m_;
2824  auto n = this->n_;
2825  auto l = this->l_;
2826  auto ndot = this->ndot_;
2827 
2828  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2829  C4(m*n,ndot), C5(m*n,ndot);
2830  std::vector<ScalarType> b(l*n);
2831  for (unsigned int j=0; j<l; j++) {
2832  for (unsigned int i=0; i<m; i++) {
2833  A[i+j*m] = FadType(ndot, this->urand.number());
2834  for (unsigned int k=0; k<ndot; k++)
2835  A[i+j*m].fastAccessDx(k) = this->urand.number();
2836  }
2837  }
2838  for (unsigned int j=0; j<n; j++) {
2839  for (unsigned int i=0; i<l; i++) {
2840  b[i+j*l] = this->urand.number();
2841  B[i+j*l] = b[i+j*l];
2842  }
2843  }
2844  FadType alpha(ndot, this->urand.number());
2845  FadType beta(ndot, this->urand.number());
2846  for (unsigned int k=0; k<ndot; k++) {
2847  alpha.fastAccessDx(k) = this->urand.number();
2848  beta.fastAccessDx(k) = this->urand.number();
2849  }
2850 
2851  for (unsigned int j=0; j<n; j++) {
2852  for (unsigned int i=0; i<m; i++) {
2853  ScalarType val = this->urand.number();
2854  C1[i+j*m] = FadType(ndot, val);
2855  C2[i+j*m] = FadType(ndot, val);
2856  C3[i+j*m] = FadType(ndot, val);
2857  C4[i+j*m] = FadType(ndot, val);
2858  C5[i+j*m] = FadType(ndot, val);
2859  for (unsigned int k=0; k<ndot; k++) {
2860  val = this->urand.number();
2861  C1[i+j*m].fastAccessDx(k) = val;
2862  C2[i+j*m].fastAccessDx(k) = val;
2863  C3[i+j*m].fastAccessDx(k) = val;
2864  C4[i+j*m].fastAccessDx(k) = val;
2865  C5[i+j*m].fastAccessDx(k) = val;
2866  }
2867  }
2868  }
2869 
2870  Teuchos::BLAS<int,FadType> teuchos_blas;
2871  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2872  &A[0], m, &B[0], l, beta, &C1[0], m);
2873 
2874  Teuchos::BLAS<int,FadType> sacado_blas(false);
2875  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2876  &A[0], m, &B[0], l, beta, &C2[0], m);
2877 
2878  COMPARE_FAD_VECTORS(C1, C2, m*n);
2879 
2880  unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2881  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2882  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2883  &A[0], m, &B[0], l, beta, &C3[0], m);
2884 
2885  COMPARE_FAD_VECTORS(C1, C3, m*n);
2886 
2887  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2888  &A[0], m, &b[0], l, beta, &C4[0], m);
2889 
2890  COMPARE_FAD_VECTORS(C1, C4, m*n);
2891 
2892  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2893  &A[0], m, &b[0], l, beta, &C5[0], m);
2894 
2895  COMPARE_FAD_VECTORS(C1, C5, m*n);
2896 
2897  // transa
2898  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2899  &A[0], l, &B[0], l, beta, &C1[0], m);
2900  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2901  &A[0], l, &B[0], l, beta, &C2[0], m);
2902  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2903  &A[0], l, &B[0], l, beta, &C3[0], m);
2904  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2905  &A[0], l, &b[0], l, beta, &C4[0], m);
2906  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2907  &A[0], l, &b[0], l, beta, &C5[0], m);
2908 
2909  COMPARE_FAD_VECTORS(C1, C2, m*n);
2910  COMPARE_FAD_VECTORS(C1, C3, m*n);
2911  COMPARE_FAD_VECTORS(C1, C4, m*n);
2912  COMPARE_FAD_VECTORS(C1, C5, m*n);
2913 
2914  // transb
2915  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2916  &A[0], m, &B[0], n, beta, &C1[0], m);
2917  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2918  &A[0], m, &B[0], n, beta, &C2[0], m);
2919  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2920  &A[0], m, &B[0], n, beta, &C3[0], m);
2921  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2922  &A[0], m, &b[0], n, beta, &C4[0], m);
2923  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2924  &A[0], m, &b[0], n, beta, &C5[0], m);
2925 
2926  COMPARE_FAD_VECTORS(C1, C2, m*n);
2927  COMPARE_FAD_VECTORS(C1, C3, m*n);
2928  COMPARE_FAD_VECTORS(C1, C4, m*n);
2929  COMPARE_FAD_VECTORS(C1, C5, m*n);
2930 
2931  // transa and transb
2932  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2933  &A[0], l, &B[0], n, beta, &C1[0], m);
2934  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2935  &A[0], l, &B[0], n, beta, &C2[0], m);
2936  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2937  &A[0], l, &B[0], n, beta, &C3[0], m);
2938  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2939  &A[0], l, &b[0], n, beta, &C4[0], m);
2940  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2941  &A[0], l, &b[0], n, beta, &C5[0], m);
2942 
2943  COMPARE_FAD_VECTORS(C1, C2, m*n);
2944  COMPARE_FAD_VECTORS(C1, C3, m*n);
2945  COMPARE_FAD_VECTORS(C1, C4, m*n);
2946  COMPARE_FAD_VECTORS(C1, C5, m*n);
2947 }
2948 
2949 // Tests with constant A and B
2951  typedef decltype(this->fad) FadType;
2952  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2953  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
2954  auto m = this->m_;
2955  auto n = this->n_;
2956  auto l = this->l_;
2957  auto ndot = this->ndot_;
2958 
2959  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2960  C4(m*n,ndot), C5(m*n,ndot);
2961  std::vector<ScalarType> a(m*l), b(l*n);
2962  for (unsigned int j=0; j<l; j++) {
2963  for (unsigned int i=0; i<m; i++) {
2964  a[i+j*m] = this->urand.number();
2965  A[i+j*m] = a[i+j*m];
2966  }
2967  }
2968  for (unsigned int j=0; j<n; j++) {
2969  for (unsigned int i=0; i<l; i++) {
2970  b[i+j*l] = this->urand.number();
2971  B[i+j*l] = b[i+j*l];
2972  }
2973  }
2974  FadType alpha(ndot, this->urand.number());
2975  FadType beta(ndot, this->urand.number());
2976  for (unsigned int k=0; k<ndot; k++) {
2977  alpha.fastAccessDx(k) = this->urand.number();
2978  beta.fastAccessDx(k) = this->urand.number();
2979  }
2980 
2981  for (unsigned int j=0; j<n; j++) {
2982  for (unsigned int i=0; i<m; i++) {
2983  ScalarType val = this->urand.number();
2984  C1[i+j*m] = FadType(ndot, val);
2985  C2[i+j*m] = FadType(ndot, val);
2986  C3[i+j*m] = FadType(ndot, val);
2987  C4[i+j*m] = FadType(ndot, val);
2988  C5[i+j*m] = FadType(ndot, val);
2989  for (unsigned int k=0; k<ndot; k++) {
2990  val = this->urand.number();
2991  C1[i+j*m].fastAccessDx(k) = val;
2992  C2[i+j*m].fastAccessDx(k) = val;
2993  C3[i+j*m].fastAccessDx(k) = val;
2994  C4[i+j*m].fastAccessDx(k) = val;
2995  C5[i+j*m].fastAccessDx(k) = val;
2996  }
2997  }
2998  }
2999 
3000  Teuchos::BLAS<int,FadType> teuchos_blas;
3001  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3002  &A[0], m, &B[0], l, beta, &C1[0], m);
3003 
3004  Teuchos::BLAS<int,FadType> sacado_blas(false);
3005  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3006  &A[0], m, &B[0], l, beta, &C2[0], m);
3007 
3008  COMPARE_FAD_VECTORS(C1, C2, m*n);
3009 
3010  unsigned int sz = m*l + l*n + m*n*(1+ndot);
3011  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3012  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3013  &A[0], m, &B[0], l, beta, &C3[0], m);
3014 
3015  COMPARE_FAD_VECTORS(C1, C3, m*n);
3016 
3017  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3018  &a[0], m, &b[0], l, beta, &C4[0], m);
3019 
3020  COMPARE_FAD_VECTORS(C1, C4, m*n);
3021 
3022  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3023  &a[0], m, &b[0], l, beta, &C5[0], m);
3024 
3025  COMPARE_FAD_VECTORS(C1, C5, m*n);
3026 
3027  // transa
3028  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3029  &A[0], l, &B[0], l, beta, &C1[0], m);
3030  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3031  &A[0], l, &B[0], l, beta, &C2[0], m);
3032  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3033  &A[0], l, &B[0], l, beta, &C3[0], m);
3034  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3035  &a[0], l, &b[0], l, beta, &C4[0], m);
3036  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3037  &a[0], l, &b[0], l, beta, &C5[0], m);
3038 
3039  COMPARE_FAD_VECTORS(C1, C2, m*n);
3040  COMPARE_FAD_VECTORS(C1, C3, m*n);
3041  COMPARE_FAD_VECTORS(C1, C4, m*n);
3042  COMPARE_FAD_VECTORS(C1, C5, m*n);
3043 
3044  // transb
3045  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3046  &A[0], m, &B[0], n, beta, &C1[0], m);
3047  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3048  &A[0], m, &B[0], n, beta, &C2[0], m);
3049  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3050  &A[0], m, &B[0], n, beta, &C3[0], m);
3051  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3052  &a[0], m, &b[0], n, beta, &C4[0], m);
3053  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3054  &a[0], m, &b[0], n, beta, &C5[0], m);
3055 
3056  COMPARE_FAD_VECTORS(C1, C2, m*n);
3057  COMPARE_FAD_VECTORS(C1, C3, m*n);
3058  COMPARE_FAD_VECTORS(C1, C4, m*n);
3059  COMPARE_FAD_VECTORS(C1, C5, m*n);
3060 
3061  // transa and transb
3062  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3063  &A[0], l, &B[0], n, beta, &C1[0], m);
3064  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3065  &A[0], l, &B[0], n, beta, &C2[0], m);
3066  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3067  &A[0], l, &B[0], n, beta, &C3[0], m);
3068  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3069  &a[0], l, &b[0], n, beta, &C4[0], m);
3070  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3071  &a[0], l, &b[0], n, beta, &C5[0], m);
3072 
3073  COMPARE_FAD_VECTORS(C1, C2, m*n);
3074  COMPARE_FAD_VECTORS(C1, C3, m*n);
3075  COMPARE_FAD_VECTORS(C1, C4, m*n);
3076  COMPARE_FAD_VECTORS(C1, C5, m*n);
3077 }
3078 
3079 // Tests all arguments, left side
3081  typedef decltype(this->fad) FadType;
3082  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3083  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3084  auto m = this->m_;
3085  auto n = this->n_;
3086  auto ndot = this->ndot_;
3087 
3088 
3089  // SYMM is apparently not defined in the BLAS for complex types
3091  return;
3092 
3093  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3094  for (unsigned int j=0; j<m; j++) {
3095  for (unsigned int i=0; i<m; i++) {
3096  A[i+j*m] = FadType(ndot, this->urand.number());
3097  for (unsigned int k=0; k<ndot; k++)
3098  A[i+j*m].fastAccessDx(k) = this->urand.number();
3099  }
3100  }
3101  for (unsigned int j=0; j<n; j++) {
3102  for (unsigned int i=0; i<m; i++) {
3103  B[i+j*m] = FadType(ndot, this->urand.number());
3104  for (unsigned int k=0; k<ndot; k++)
3105  B[i+j*m].fastAccessDx(k) = this->urand.number();
3106  }
3107  }
3108  FadType alpha(ndot, this->urand.number());
3109  FadType beta(ndot, this->urand.number());
3110  for (unsigned int k=0; k<ndot; k++) {
3111  alpha.fastAccessDx(k) = this->urand.number();
3112  beta.fastAccessDx(k) = this->urand.number();
3113  }
3114 
3115  for (unsigned int j=0; j<n; j++) {
3116  for (unsigned int i=0; i<m; i++) {
3117  ScalarType val = this->urand.number();
3118  C1[i+j*m] = FadType(ndot, val);
3119  C2[i+j*m] = FadType(ndot, val);
3120  C3[i+j*m] = FadType(ndot, val);
3121  for (unsigned int k=0; k<ndot; k++) {
3122  val = this->urand.number();
3123  C1[i+j*m].fastAccessDx(k) = val;
3124  C2[i+j*m].fastAccessDx(k) = val;
3125  C3[i+j*m].fastAccessDx(k) = val;
3126  }
3127  }
3128  }
3129 
3130  Teuchos::BLAS<int,FadType> teuchos_blas;
3131  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3132  &A[0], m, &B[0], m, beta, &C1[0], m);
3133 
3134  Teuchos::BLAS<int,FadType> sacado_blas(false);
3135  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3136  &A[0], m, &B[0], m, beta, &C2[0], m);
3137 
3138  COMPARE_FAD_VECTORS(C1, C2, m*n);
3139 
3140  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3141  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3142  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3143  &A[0], m, &B[0], m, beta, &C3[0], m);
3144 
3145  COMPARE_FAD_VECTORS(C1, C3, m*n);
3146 
3147  // lower tri
3148  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3149  &A[0], m, &B[0], m, beta, &C1[0], m);
3150  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3151  &A[0], m, &B[0], m, beta, &C2[0], m);
3152  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3153  &A[0], m, &B[0], m, beta, &C3[0], m);
3154 
3155  COMPARE_FAD_VECTORS(C1, C2, m*n);
3156  COMPARE_FAD_VECTORS(C1, C3, m*n);
3157 }
3158 
3159 // Tests all arguments, right side
3161  typedef decltype(this->fad) FadType;
3162  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3163  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3164  auto m = this->m_;
3165  auto n = this->n_;
3166  auto ndot = this->ndot_;
3167 
3168 
3169  // SYMM is apparently not defined in the BLAS for complex types
3171  return;
3172 
3173  VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3174  for (unsigned int j=0; j<n; j++) {
3175  for (unsigned int i=0; i<n; i++) {
3176  A[i+j*n] = FadType(ndot, this->urand.number());
3177  for (unsigned int k=0; k<ndot; k++)
3178  A[i+j*n].fastAccessDx(k) = this->urand.number();
3179  }
3180  }
3181  for (unsigned int j=0; j<n; j++) {
3182  for (unsigned int i=0; i<m; i++) {
3183  B[i+j*m] = FadType(ndot, this->urand.number());
3184  for (unsigned int k=0; k<ndot; k++)
3185  B[i+j*m].fastAccessDx(k) = this->urand.number();
3186  }
3187  }
3188  FadType alpha(ndot, this->urand.number());
3189  FadType beta(ndot, this->urand.number());
3190  for (unsigned int k=0; k<ndot; k++) {
3191  alpha.fastAccessDx(k) = this->urand.number();
3192  beta.fastAccessDx(k) = this->urand.number();
3193  }
3194 
3195  for (unsigned int j=0; j<n; j++) {
3196  for (unsigned int i=0; i<m; i++) {
3197  ScalarType val = this->urand.number();
3198  C1[i+j*m] = FadType(ndot, val);
3199  C2[i+j*m] = FadType(ndot, val);
3200  C3[i+j*m] = FadType(ndot, val);
3201  for (unsigned int k=0; k<ndot; k++) {
3202  val = this->urand.number();
3203  C1[i+j*m].fastAccessDx(k) = val;
3204  C2[i+j*m].fastAccessDx(k) = val;
3205  C3[i+j*m].fastAccessDx(k) = val;
3206  }
3207  }
3208  }
3209 
3210  Teuchos::BLAS<int,FadType> teuchos_blas;
3211  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3212  &A[0], n, &B[0], m, beta, &C1[0], m);
3213 
3214  Teuchos::BLAS<int,FadType> sacado_blas(false);
3215  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3216  &A[0], n, &B[0], m, beta, &C2[0], m);
3217 
3218  COMPARE_FAD_VECTORS(C1, C2, m*n);
3219 
3220  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3221  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3222  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3223  &A[0], n, &B[0], m, beta, &C3[0], m);
3224 
3225  COMPARE_FAD_VECTORS(C1, C3, m*n);
3226 
3227  // lower tri
3228  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3229  &A[0], n, &B[0], m, beta, &C1[0], m);
3230  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3231  &A[0], n, &B[0], m, beta, &C2[0], m);
3232  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3233  &A[0], n, &B[0], m, beta, &C3[0], m);
3234 
3235  COMPARE_FAD_VECTORS(C1, C2, m*n);
3236  COMPARE_FAD_VECTORS(C1, C3, m*n);
3237 }
3238 
3239 // Tests different lda, ldb, ldc, left side
3241  typedef decltype(this->fad) FadType;
3242  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3243  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3244  auto m = this->m_;
3245  auto n = this->n_;
3246  auto ndot = this->ndot_;
3247 
3248 
3249  // SYMM is apparently not defined in the BLAS for complex types
3251  return;
3252 
3253  unsigned int lda = m+4;
3254  unsigned int ldb = m+5;
3255  unsigned int ldc = m+6;
3256  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3257  C3(ldc*n,ndot);
3258  for (unsigned int j=0; j<m; j++) {
3259  for (unsigned int i=0; i<lda; i++) {
3260  A[i+j*lda] = FadType(ndot, this->urand.number());
3261  for (unsigned int k=0; k<ndot; k++)
3262  A[i+j*lda].fastAccessDx(k) = this->urand.number();
3263  }
3264  }
3265  for (unsigned int j=0; j<n; j++) {
3266  for (unsigned int i=0; i<ldb; i++) {
3267  B[i+j*ldb] = FadType(ndot, this->urand.number());
3268  for (unsigned int k=0; k<ndot; k++)
3269  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3270  }
3271  }
3272  FadType alpha(ndot, this->urand.number());
3273  FadType beta(ndot, this->urand.number());
3274  for (unsigned int k=0; k<ndot; k++) {
3275  alpha.fastAccessDx(k) = this->urand.number();
3276  beta.fastAccessDx(k) = this->urand.number();
3277  }
3278 
3279  for (unsigned int j=0; j<n; j++) {
3280  for (unsigned int i=0; i<ldc; i++) {
3281  ScalarType val = this->urand.number();
3282  C1[i+j*ldc] = FadType(ndot, val);
3283  C2[i+j*ldc] = FadType(ndot, val);
3284  C3[i+j*ldc] = FadType(ndot, val);
3285  for (unsigned int k=0; k<ndot; k++) {
3286  val = this->urand.number();
3287  C1[i+j*ldc].fastAccessDx(k) = val;
3288  C2[i+j*ldc].fastAccessDx(k) = val;
3289  C3[i+j*ldc].fastAccessDx(k) = val;
3290  }
3291  }
3292  }
3293 
3294  Teuchos::BLAS<int,FadType> teuchos_blas;
3295  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3296  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3297 
3298  Teuchos::BLAS<int,FadType> sacado_blas(false);
3299  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3300  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3301 
3302  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3303 
3304  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3305  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3306  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3307  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3308 
3309  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3310 
3311  // lower tri
3312  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3313  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3314  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3315  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3316  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3317  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3318 
3319  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3320  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3321 }
3322 
3323 // Tests different lda, ldb, ldc, right side
3325  typedef decltype(this->fad) FadType;
3326  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3327  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3328  auto m = this->m_;
3329  auto n = this->n_;
3330  auto ndot = this->ndot_;
3331 
3332 
3333  // SYMM is apparently not defined in the BLAS for complex types
3335  return;
3336 
3337  unsigned int lda = n+4;
3338  unsigned int ldb = m+5;
3339  unsigned int ldc = m+6;
3340  VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3341  C3(ldc*n,ndot);
3342  for (unsigned int j=0; j<n; j++) {
3343  for (unsigned int i=0; i<lda; i++) {
3344  A[i+j*lda] = FadType(ndot, this->urand.number());
3345  for (unsigned int k=0; k<ndot; k++)
3346  A[i+j*lda].fastAccessDx(k) = this->urand.number();
3347  }
3348  }
3349  for (unsigned int j=0; j<n; j++) {
3350  for (unsigned int i=0; i<ldb; i++) {
3351  B[i+j*ldb] = FadType(ndot, this->urand.number());
3352  for (unsigned int k=0; k<ndot; k++)
3353  B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3354  }
3355  }
3356  FadType alpha(ndot, this->urand.number());
3357  FadType beta(ndot, this->urand.number());
3358  for (unsigned int k=0; k<ndot; k++) {
3359  alpha.fastAccessDx(k) = this->urand.number();
3360  beta.fastAccessDx(k) = this->urand.number();
3361  }
3362 
3363  for (unsigned int j=0; j<n; j++) {
3364  for (unsigned int i=0; i<ldc; i++) {
3365  ScalarType val = this->urand.number();
3366  C1[i+j*ldc] = FadType(ndot, val);
3367  C2[i+j*ldc] = FadType(ndot, val);
3368  C3[i+j*ldc] = FadType(ndot, val);
3369  for (unsigned int k=0; k<ndot; k++) {
3370  val = this->urand.number();
3371  C1[i+j*ldc].fastAccessDx(k) = val;
3372  C2[i+j*ldc].fastAccessDx(k) = val;
3373  C3[i+j*ldc].fastAccessDx(k) = val;
3374  }
3375  }
3376  }
3377 
3378  Teuchos::BLAS<int,FadType> teuchos_blas;
3379  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3380  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3381 
3382  Teuchos::BLAS<int,FadType> sacado_blas(false);
3383  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3384  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3385 
3386  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3387 
3388  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3389  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3390  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3391  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3392 
3393  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3394 
3395  // lower tri
3396  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3397  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3398  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3399  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3400  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3401  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3402 
3403  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3404  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3405 }
3406 
3407 // Tests with constant C
3409  typedef decltype(this->fad) FadType;
3410  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3411  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3412  auto m = this->m_;
3413  auto n = this->n_;
3414  auto ndot = this->ndot_;
3415 
3416 
3417  // SYMM is apparently not defined in the BLAS for complex types
3419  return;
3420 
3421  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3422  for (unsigned int j=0; j<m; j++) {
3423  for (unsigned int i=0; i<m; i++) {
3424  A[i+j*m] = FadType(ndot, this->urand.number());
3425  for (unsigned int k=0; k<ndot; k++)
3426  A[i+j*m].fastAccessDx(k) = this->urand.number();
3427  }
3428  }
3429  for (unsigned int j=0; j<n; j++) {
3430  for (unsigned int i=0; i<m; i++) {
3431  B[i+j*m] = FadType(ndot, this->urand.number());
3432  for (unsigned int k=0; k<ndot; k++)
3433  B[i+j*m].fastAccessDx(k) = this->urand.number();
3434  }
3435  }
3436  FadType alpha(ndot, this->urand.number());
3437  FadType beta(ndot, this->urand.number());
3438  for (unsigned int k=0; k<ndot; k++) {
3439  alpha.fastAccessDx(k) = this->urand.number();
3440  beta.fastAccessDx(k) = this->urand.number();
3441  }
3442 
3443  for (unsigned int j=0; j<n; j++) {
3444  for (unsigned int i=0; i<m; i++) {
3445  ScalarType val = this->urand.number();
3446  C1[i+j*m] = val;
3447  C2[i+j*m] = val;
3448  C3[i+j*m] = val;
3449  }
3450  }
3451 
3452  Teuchos::BLAS<int,FadType> teuchos_blas;
3453  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3454  &A[0], m, &B[0], m, beta, &C1[0], m);
3455 
3456  Teuchos::BLAS<int,FadType> sacado_blas(false);
3457  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3458  &A[0], m, &B[0], m, beta, &C2[0], m);
3459 
3460  COMPARE_FAD_VECTORS(C1, C2, m*n);
3461 
3462  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3463  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3464  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3465  &A[0], m, &B[0], m, beta, &C3[0], m);
3466 
3467  COMPARE_FAD_VECTORS(C1, C3, m*n);
3468 
3469  // lower tri
3470  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3471  &A[0], m, &B[0], m, beta, &C1[0], m);
3472  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3473  &A[0], m, &B[0], m, beta, &C2[0], m);
3474  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3475  &A[0], m, &B[0], m, beta, &C3[0], m);
3476 
3477  COMPARE_FAD_VECTORS(C1, C2, m*n);
3478  COMPARE_FAD_VECTORS(C1, C3, m*n);
3479 }
3480 
3481 // Tests with constant alpha, beta
3483  typedef decltype(this->fad) FadType;
3484  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3485  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3486  auto m = this->m_;
3487  auto n = this->n_;
3488  auto ndot = this->ndot_;
3489 
3490 
3491  // SYMM is apparently not defined in the BLAS for complex types
3493  return;
3494 
3495  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3496  for (unsigned int j=0; j<m; j++) {
3497  for (unsigned int i=0; i<m; i++) {
3498  A[i+j*m] = FadType(ndot, this->urand.number());
3499  for (unsigned int k=0; k<ndot; k++)
3500  A[i+j*m].fastAccessDx(k) = this->urand.number();
3501  }
3502  }
3503  for (unsigned int j=0; j<n; j++) {
3504  for (unsigned int i=0; i<m; i++) {
3505  B[i+j*m] = FadType(ndot, this->urand.number());
3506  for (unsigned int k=0; k<ndot; k++)
3507  B[i+j*m].fastAccessDx(k) = this->urand.number();
3508  }
3509  }
3510  ScalarType alpha = this->urand.number();
3511  ScalarType beta = this->urand.number();
3512 
3513  for (unsigned int j=0; j<n; j++) {
3514  for (unsigned int i=0; i<m; i++) {
3515  ScalarType val = this->urand.number();
3516  C1[i+j*m] = FadType(ndot, val);
3517  C2[i+j*m] = FadType(ndot, val);
3518  C3[i+j*m] = FadType(ndot, val);
3519  for (unsigned int k=0; k<ndot; k++) {
3520  val = this->urand.number();
3521  C1[i+j*m].fastAccessDx(k) = val;
3522  C2[i+j*m].fastAccessDx(k) = val;
3523  C3[i+j*m].fastAccessDx(k) = val;
3524  }
3525  }
3526  }
3527 
3528  Teuchos::BLAS<int,FadType> teuchos_blas;
3529  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3530  &A[0], m, &B[0], m, beta, &C1[0], m);
3531 
3532  Teuchos::BLAS<int,FadType> sacado_blas(false);
3533  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3534  &A[0], m, &B[0], m, beta, &C2[0], m);
3535 
3536  COMPARE_FAD_VECTORS(C1, C2, m*n);
3537 
3538  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3539  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3540  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3541  &A[0], m, &B[0], m, beta, &C3[0], m);
3542 
3543  COMPARE_FAD_VECTORS(C1, C3, m*n);
3544 
3545  // lower tri
3546  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3547  &A[0], m, &B[0], m, beta, &C1[0], m);
3548  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3549  &A[0], m, &B[0], m, beta, &C2[0], m);
3550  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3551  &A[0], m, &B[0], m, beta, &C3[0], m);
3552 
3553  COMPARE_FAD_VECTORS(C1, C2, m*n);
3554  COMPARE_FAD_VECTORS(C1, C3, m*n);
3555 }
3556 
3557 // Tests with constant A
3559  typedef decltype(this->fad) FadType;
3560  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3561  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3562  auto m = this->m_;
3563  auto n = this->n_;
3564  auto ndot = this->ndot_;
3565 
3566 
3567  // SYMM is apparently not defined in the BLAS for complex types
3569  return;
3570 
3571  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3572  C4(m*n,ndot), C5(m*n,ndot);
3573  std::vector<ScalarType> a(m*m);
3574  for (unsigned int j=0; j<m; j++) {
3575  for (unsigned int i=0; i<m; i++) {
3576  a[i+j*m] = this->urand.number();
3577  A[i+j*m] = a[i+j*m];
3578  }
3579  }
3580  for (unsigned int j=0; j<n; j++) {
3581  for (unsigned int i=0; i<m; i++) {
3582  B[i+j*m] = FadType(ndot, this->urand.number());
3583  for (unsigned int k=0; k<ndot; k++)
3584  B[i+j*m].fastAccessDx(k) = this->urand.number();
3585  }
3586  }
3587  FadType alpha(ndot, this->urand.number());
3588  FadType beta(ndot, this->urand.number());
3589  for (unsigned int k=0; k<ndot; k++) {
3590  alpha.fastAccessDx(k) = this->urand.number();
3591  beta.fastAccessDx(k) = this->urand.number();
3592  }
3593 
3594  for (unsigned int j=0; j<n; j++) {
3595  for (unsigned int i=0; i<m; i++) {
3596  ScalarType val = this->urand.number();
3597  C1[i+j*m] = FadType(ndot, val);
3598  C2[i+j*m] = FadType(ndot, val);
3599  C3[i+j*m] = FadType(ndot, val);
3600  C4[i+j*m] = FadType(ndot, val);
3601  C5[i+j*m] = FadType(ndot, val);
3602  for (unsigned int k=0; k<ndot; k++) {
3603  val = this->urand.number();
3604  C1[i+j*m].fastAccessDx(k) = val;
3605  C2[i+j*m].fastAccessDx(k) = val;
3606  C3[i+j*m].fastAccessDx(k) = val;
3607  C4[i+j*m].fastAccessDx(k) = val;
3608  C5[i+j*m].fastAccessDx(k) = val;
3609  }
3610  }
3611  }
3612 
3613  Teuchos::BLAS<int,FadType> teuchos_blas;
3614  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3615  &A[0], m, &B[0], m, beta, &C1[0], m);
3616 
3617  Teuchos::BLAS<int,FadType> sacado_blas(false);
3618  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3619  &A[0], m, &B[0], m, beta, &C2[0], m);
3620 
3621  COMPARE_FAD_VECTORS(C1, C2, m*n);
3622 
3623  unsigned int sz = m*m + 2*m*n*(1+ndot);
3624  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3625  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3626  &A[0], m, &B[0], m, beta, &C3[0], m);
3627 
3628  COMPARE_FAD_VECTORS(C1, C3, m*n);
3629 
3630  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3631  &a[0], m, &B[0], m, beta, &C4[0], m);
3632 
3633  COMPARE_FAD_VECTORS(C1, C4, m*n);
3634 
3635  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3636  &a[0], m, &B[0], m, beta, &C5[0], m);
3637 
3638  COMPARE_FAD_VECTORS(C1, C5, m*n);
3639 
3640  // lower tri
3641  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3642  &A[0], m, &B[0], m, beta, &C1[0], m);
3643  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3644  &A[0], m, &B[0], m, beta, &C2[0], m);
3645  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3646  &A[0], m, &B[0], m, beta, &C3[0], m);
3647  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3648  &a[0], m, &B[0], m, beta, &C4[0], m);
3649  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650  &a[0], m, &B[0], m, beta, &C5[0], m);
3651 
3652  COMPARE_FAD_VECTORS(C1, C2, m*n);
3653  COMPARE_FAD_VECTORS(C1, C3, m*n);
3654  COMPARE_FAD_VECTORS(C1, C4, m*n);
3655  COMPARE_FAD_VECTORS(C1, C5, m*n);
3656 }
3657 
3658 // Tests with constant B
3660  typedef decltype(this->fad) FadType;
3661  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3662  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3663  auto m = this->m_;
3664  auto n = this->n_;
3665  auto ndot = this->ndot_;
3666 
3667 
3668  // SYMM is apparently not defined in the BLAS for complex types
3670  return;
3671 
3672  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3673  C4(m*n,ndot), C5(m*n,ndot);
3674  std::vector<ScalarType> b(m*n);
3675  for (unsigned int j=0; j<m; j++) {
3676  for (unsigned int i=0; i<m; i++) {
3677  A[i+j*m] = FadType(ndot, this->urand.number());
3678  for (unsigned int k=0; k<ndot; k++)
3679  A[i+j*m].fastAccessDx(k) = this->urand.number();
3680  }
3681  }
3682  for (unsigned int j=0; j<n; j++) {
3683  for (unsigned int i=0; i<m; i++) {
3684  b[i+j*m] = this->urand.number();
3685  B[i+j*m] = b[i+j*m];
3686  }
3687  }
3688  FadType alpha(ndot, this->urand.number());
3689  FadType beta(ndot, this->urand.number());
3690  for (unsigned int k=0; k<ndot; k++) {
3691  alpha.fastAccessDx(k) = this->urand.number();
3692  beta.fastAccessDx(k) = this->urand.number();
3693  }
3694 
3695  for (unsigned int j=0; j<n; j++) {
3696  for (unsigned int i=0; i<m; i++) {
3697  ScalarType val = this->urand.number();
3698  C1[i+j*m] = FadType(ndot, val);
3699  C2[i+j*m] = FadType(ndot, val);
3700  C3[i+j*m] = FadType(ndot, val);
3701  C4[i+j*m] = FadType(ndot, val);
3702  C5[i+j*m] = FadType(ndot, val);
3703  for (unsigned int k=0; k<ndot; k++) {
3704  val = this->urand.number();
3705  C1[i+j*m].fastAccessDx(k) = val;
3706  C2[i+j*m].fastAccessDx(k) = val;
3707  C3[i+j*m].fastAccessDx(k) = val;
3708  C4[i+j*m].fastAccessDx(k) = val;
3709  C5[i+j*m].fastAccessDx(k) = val;
3710  }
3711  }
3712  }
3713 
3714  Teuchos::BLAS<int,FadType> teuchos_blas;
3715  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3716  &A[0], m, &B[0], m, beta, &C1[0], m);
3717 
3718  Teuchos::BLAS<int,FadType> sacado_blas(false);
3719  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720  &A[0], m, &B[0], m, beta, &C2[0], m);
3721 
3722  COMPARE_FAD_VECTORS(C1, C2, m*n);
3723 
3724  unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3725  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3726  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3727  &A[0], m, &B[0], m, beta, &C3[0], m);
3728 
3729  COMPARE_FAD_VECTORS(C1, C3, m*n);
3730 
3731  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3732  &A[0], m, &b[0], m, beta, &C4[0], m);
3733 
3734  COMPARE_FAD_VECTORS(C1, C4, m*n);
3735 
3736  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3737  &A[0], m, &b[0], m, beta, &C5[0], m);
3738 
3739  COMPARE_FAD_VECTORS(C1, C5, m*n);
3740 
3741  // lower tri
3742  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3743  &A[0], m, &B[0], m, beta, &C1[0], m);
3744  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3745  &A[0], m, &B[0], m, beta, &C2[0], m);
3746  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747  &A[0], m, &B[0], m, beta, &C3[0], m);
3748  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749  &A[0], m, &b[0], m, beta, &C4[0], m);
3750  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751  &A[0], m, &b[0], m, beta, &C5[0], m);
3752 
3753  COMPARE_FAD_VECTORS(C1, C2, m*n);
3754  COMPARE_FAD_VECTORS(C1, C3, m*n);
3755  COMPARE_FAD_VECTORS(C1, C4, m*n);
3756  COMPARE_FAD_VECTORS(C1, C5, m*n);
3757 }
3758 
3759 // Tests with constant A and B
3761  typedef decltype(this->fad) FadType;
3762  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3763  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3764  auto m = this->m_;
3765  auto n = this->n_;
3766  auto ndot = this->ndot_;
3767 
3768 
3769  // SYMM is apparently not defined in the BLAS for complex types
3771  return;
3772 
3773  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774  C4(m*n,ndot), C5(m*n,ndot);
3775  std::vector<ScalarType> a(m*m), b(m*n);
3776  for (unsigned int j=0; j<m; j++) {
3777  for (unsigned int i=0; i<m; i++) {
3778  a[i+j*m] = this->urand.number();
3779  A[i+j*m] = a[i+j*m];
3780  }
3781  }
3782  for (unsigned int j=0; j<n; j++) {
3783  for (unsigned int i=0; i<m; i++) {
3784  b[i+j*m] = this->urand.number();
3785  B[i+j*m] = b[i+j*m];
3786  }
3787  }
3788  FadType alpha(ndot, this->urand.number());
3789  FadType beta(ndot, this->urand.number());
3790  for (unsigned int k=0; k<ndot; k++) {
3791  alpha.fastAccessDx(k) = this->urand.number();
3792  beta.fastAccessDx(k) = this->urand.number();
3793  }
3794 
3795  for (unsigned int j=0; j<n; j++) {
3796  for (unsigned int i=0; i<m; i++) {
3797  ScalarType val = this->urand.number();
3798  C1[i+j*m] = FadType(ndot, val);
3799  C2[i+j*m] = FadType(ndot, val);
3800  C3[i+j*m] = FadType(ndot, val);
3801  C4[i+j*m] = FadType(ndot, val);
3802  C5[i+j*m] = FadType(ndot, val);
3803  for (unsigned int k=0; k<ndot; k++) {
3804  val = this->urand.number();
3805  C1[i+j*m].fastAccessDx(k) = val;
3806  C2[i+j*m].fastAccessDx(k) = val;
3807  C3[i+j*m].fastAccessDx(k) = val;
3808  C4[i+j*m].fastAccessDx(k) = val;
3809  C5[i+j*m].fastAccessDx(k) = val;
3810  }
3811  }
3812  }
3813 
3814  Teuchos::BLAS<int,FadType> teuchos_blas;
3815  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816  &A[0], m, &B[0], m, beta, &C1[0], m);
3817 
3818  Teuchos::BLAS<int,FadType> sacado_blas(false);
3819  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820  &A[0], m, &B[0], m, beta, &C2[0], m);
3821 
3822  COMPARE_FAD_VECTORS(C1, C2, m*n);
3823 
3824  unsigned int sz = m*m + m*n*(2+ndot);
3825  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3826  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827  &A[0], m, &B[0], m, beta, &C3[0], m);
3828 
3829  COMPARE_FAD_VECTORS(C1, C3, m*n);
3830 
3831  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832  &a[0], m, &b[0], m, beta, &C4[0], m);
3833 
3834  COMPARE_FAD_VECTORS(C1, C4, m*n);
3835 
3836  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837  &a[0], m, &b[0], m, beta, &C5[0], m);
3838 
3839  COMPARE_FAD_VECTORS(C1, C5, m*n);
3840 
3841  // lower tri
3842  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843  &A[0], m, &B[0], m, beta, &C1[0], m);
3844  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845  &A[0], m, &B[0], m, beta, &C2[0], m);
3846  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847  &A[0], m, &B[0], m, beta, &C3[0], m);
3848  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849  &a[0], m, &b[0], m, beta, &C4[0], m);
3850  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851  &a[0], m, &b[0], m, beta, &C5[0], m);
3852 
3853  COMPARE_FAD_VECTORS(C1, C2, m*n);
3854  COMPARE_FAD_VECTORS(C1, C3, m*n);
3855  COMPARE_FAD_VECTORS(C1, C4, m*n);
3856  COMPARE_FAD_VECTORS(C1, C5, m*n);
3857 }
3858 
3859 // Tests all arguments, left side
3861  typedef decltype(this->fad) FadType;
3862  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3863  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3864  auto m = this->m_;
3865  auto n = this->n_;
3866  auto ndot = this->ndot_;
3867 
3868  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3869  for (unsigned int j=0; j<m; j++) {
3870  for (unsigned int i=0; i<m; i++) {
3871  A[i+j*m] = FadType(ndot, this->urand.number());
3872  for (unsigned int k=0; k<ndot; k++)
3873  A[i+j*m].fastAccessDx(k) = this->urand.number();
3874  }
3875  }
3876  FadType alpha(ndot, this->urand.number());
3877  for (unsigned int k=0; k<ndot; k++) {
3878  alpha.fastAccessDx(k) = this->urand.number();
3879  }
3880 
3881  for (unsigned int j=0; j<n; j++) {
3882  for (unsigned int i=0; i<m; i++) {
3883  ScalarType val = this->urand.number();
3884  B1[i+j*m] = FadType(ndot, val);
3885  B2[i+j*m] = FadType(ndot, val);
3886  B3[i+j*m] = FadType(ndot, val);
3887  for (unsigned int k=0; k<ndot; k++) {
3888  val = this->urand.number();
3889  B1[i+j*m].fastAccessDx(k) = val;
3890  B2[i+j*m].fastAccessDx(k) = val;
3891  B3[i+j*m].fastAccessDx(k) = val;
3892  }
3893  }
3894  }
3895 
3896  Teuchos::BLAS<int,FadType> teuchos_blas;
3898  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3899 
3900  Teuchos::BLAS<int,FadType> sacado_blas(false);
3902  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3903 
3904  COMPARE_FAD_VECTORS(B1, B2, m*n);
3905 
3906  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3907  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3909  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3910 
3911  COMPARE_FAD_VECTORS(B1, B3, m*n);
3912 
3914  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3916  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3918  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3919  COMPARE_FAD_VECTORS(B1, B2, m*n);
3920  COMPARE_FAD_VECTORS(B1, B3, m*n);
3921 
3923  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3925  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3927  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3928  COMPARE_FAD_VECTORS(B1, B2, m*n);
3929  COMPARE_FAD_VECTORS(B1, B3, m*n);
3930 
3931  for (unsigned int i=0; i<m; i++) {
3932  A[i*m+i].val() = 1.0;
3933  for (unsigned int k=0; k<ndot; k++)
3934  A[i*m+i].fastAccessDx(k) = 0.0;
3935  }
3937  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3939  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3941  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3942  COMPARE_FAD_VECTORS(B1, B2, m*n);
3943  COMPARE_FAD_VECTORS(B1, B3, m*n);
3944 }
3945 
3946 // Tests all arguments, right side
3948  typedef decltype(this->fad) FadType;
3949  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3950  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
3951  auto m = this->m_;
3952  auto n = this->n_;
3953  auto ndot = this->ndot_;
3954 
3955  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3956  for (unsigned int j=0; j<n; j++) {
3957  for (unsigned int i=0; i<n; i++) {
3958  A[i+j*n] = FadType(ndot, this->urand.number());
3959  for (unsigned int k=0; k<ndot; k++)
3960  A[i+j*n].fastAccessDx(k) = this->urand.number();
3961  }
3962  }
3963  FadType alpha(ndot, this->urand.number());
3964  for (unsigned int k=0; k<ndot; k++) {
3965  alpha.fastAccessDx(k) = this->urand.number();
3966  }
3967 
3968  for (unsigned int j=0; j<n; j++) {
3969  for (unsigned int i=0; i<m; i++) {
3970  ScalarType val = this->urand.number();
3971  B1[i+j*m] = FadType(ndot, val);
3972  B2[i+j*m] = FadType(ndot, val);
3973  B3[i+j*m] = FadType(ndot, val);
3974  for (unsigned int k=0; k<ndot; k++) {
3975  val = this->urand.number();
3976  B1[i+j*m].fastAccessDx(k) = val;
3977  B2[i+j*m].fastAccessDx(k) = val;
3978  B3[i+j*m].fastAccessDx(k) = val;
3979  }
3980  }
3981  }
3982 
3983  Teuchos::BLAS<int,FadType> teuchos_blas;
3985  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3986 
3987  Teuchos::BLAS<int,FadType> sacado_blas(false);
3989  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3990 
3991  COMPARE_FAD_VECTORS(B1, B2, m*n);
3992 
3993  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3994  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3996  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3997 
3998  COMPARE_FAD_VECTORS(B1, B3, m*n);
3999 
4001  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4003  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4005  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4006  COMPARE_FAD_VECTORS(B1, B2, m*n);
4007  COMPARE_FAD_VECTORS(B1, B3, m*n);
4008 
4010  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4012  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4014  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4015  COMPARE_FAD_VECTORS(B1, B2, m*n);
4016  COMPARE_FAD_VECTORS(B1, B3, m*n);
4017 
4018  for (unsigned int i=0; i<n; i++) {
4019  A[i*n+i].val() = 1.0;
4020  for (unsigned int k=0; k<ndot; k++)
4021  A[i*n+i].fastAccessDx(k) = 0.0;
4022  }
4024  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4026  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4028  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4029  COMPARE_FAD_VECTORS(B1, B2, m*n);
4030  COMPARE_FAD_VECTORS(B1, B3, m*n);
4031 }
4032 
4033 // Tests all arguments, left side, different lda, ldb
4035  typedef decltype(this->fad) FadType;
4036  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4037  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4038  auto m = this->m_;
4039  auto n = this->n_;
4040  auto ndot = this->ndot_;
4041 
4042  unsigned int lda = m+4;
4043  unsigned int ldb = m+5;
4044  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4045  for (unsigned int j=0; j<m; j++) {
4046  for (unsigned int i=0; i<lda; i++) {
4047  A[i+j*lda] = FadType(ndot, this->urand.number());
4048  for (unsigned int k=0; k<ndot; k++)
4049  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4050  }
4051  }
4052  FadType alpha(ndot, this->urand.number());
4053  for (unsigned int k=0; k<ndot; k++) {
4054  alpha.fastAccessDx(k) = this->urand.number();
4055  }
4056 
4057  for (unsigned int j=0; j<n; j++) {
4058  for (unsigned int i=0; i<ldb; i++) {
4059  ScalarType val = this->urand.number();
4060  B1[i+j*ldb] = FadType(ndot, val);
4061  B2[i+j*ldb] = FadType(ndot, val);
4062  B3[i+j*ldb] = FadType(ndot, val);
4063  for (unsigned int k=0; k<ndot; k++) {
4064  val = this->urand.number();
4065  B1[i+j*ldb].fastAccessDx(k) = val;
4066  B2[i+j*ldb].fastAccessDx(k) = val;
4067  B3[i+j*ldb].fastAccessDx(k) = val;
4068  }
4069  }
4070  }
4071 
4072  Teuchos::BLAS<int,FadType> teuchos_blas;
4074  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4075 
4076  Teuchos::BLAS<int,FadType> sacado_blas(false);
4078  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4079 
4080  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4081 
4082  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4083  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4085  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4086 
4087  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4088 
4090  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4092  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4094  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4095  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4096  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4097 
4099  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4101  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4103  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4104  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4105  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4106 
4107  for (unsigned int i=0; i<m; i++) {
4108  A[i*lda+i].val() = 1.0;
4109  for (unsigned int k=0; k<ndot; k++)
4110  A[i*lda+i].fastAccessDx(k) = 0.0;
4111  }
4113  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4115  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4117  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4118  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4119  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4120 }
4121 
4122 // Tests all arguments, right side, different lda, ldb
4124  typedef decltype(this->fad) FadType;
4125  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4126  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4127  auto m = this->m_;
4128  auto n = this->n_;
4129  auto ndot = this->ndot_;
4130 
4131  unsigned int lda = n+4;
4132  unsigned int ldb = m+5;
4133  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4134  for (unsigned int j=0; j<n; j++) {
4135  for (unsigned int i=0; i<lda; i++) {
4136  A[i+j*lda] = FadType(ndot, this->urand.number());
4137  for (unsigned int k=0; k<ndot; k++)
4138  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4139  }
4140  }
4141  FadType alpha(ndot, this->urand.number());
4142  for (unsigned int k=0; k<ndot; k++) {
4143  alpha.fastAccessDx(k) = this->urand.number();
4144  }
4145 
4146  for (unsigned int j=0; j<n; j++) {
4147  for (unsigned int i=0; i<ldb; i++) {
4148  ScalarType val = this->urand.number();
4149  B1[i+j*ldb] = FadType(ndot, val);
4150  B2[i+j*ldb] = FadType(ndot, val);
4151  B3[i+j*ldb] = FadType(ndot, val);
4152  for (unsigned int k=0; k<ndot; k++) {
4153  val = this->urand.number();
4154  B1[i+j*ldb].fastAccessDx(k) = val;
4155  B2[i+j*ldb].fastAccessDx(k) = val;
4156  B3[i+j*ldb].fastAccessDx(k) = val;
4157  }
4158  }
4159  }
4160 
4161  Teuchos::BLAS<int,FadType> teuchos_blas;
4163  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4164 
4165  Teuchos::BLAS<int,FadType> sacado_blas(false);
4167  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4168 
4169  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4170 
4171  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4172  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4174  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4175 
4176  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4177 
4179  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4181  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4183  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4184  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4185  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4186 
4188  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4190  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4192  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4193  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4194  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4195 
4196  for (unsigned int i=0; i<n; i++) {
4197  A[i*lda+i].val() = 1.0;
4198  for (unsigned int k=0; k<ndot; k++)
4199  A[i*lda+i].fastAccessDx(k) = 0.0;
4200  }
4202  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4204  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4206  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4207  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4208  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4209 }
4210 
4211 // Tests constant alpha
4213  typedef decltype(this->fad) FadType;
4214  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4215  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4216  auto m = this->m_;
4217  auto n = this->n_;
4218  auto ndot = this->ndot_;
4219 
4220  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4221  for (unsigned int j=0; j<m; j++) {
4222  for (unsigned int i=0; i<m; i++) {
4223  A[i+j*m] = FadType(ndot, this->urand.number());
4224  for (unsigned int k=0; k<ndot; k++)
4225  A[i+j*m].fastAccessDx(k) = this->urand.number();
4226  }
4227  }
4228  ScalarType alpha = this->urand.number();
4229 
4230  for (unsigned int j=0; j<n; j++) {
4231  for (unsigned int i=0; i<m; i++) {
4232  ScalarType val = this->urand.number();
4233  B1[i+j*m] = FadType(ndot, val);
4234  B2[i+j*m] = FadType(ndot, val);
4235  B3[i+j*m] = FadType(ndot, val);
4236  for (unsigned int k=0; k<ndot; k++) {
4237  val = this->urand.number();
4238  B1[i+j*m].fastAccessDx(k) = val;
4239  B2[i+j*m].fastAccessDx(k) = val;
4240  B3[i+j*m].fastAccessDx(k) = val;
4241  }
4242  }
4243  }
4244 
4245  Teuchos::BLAS<int,FadType> teuchos_blas;
4247  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4248 
4249  Teuchos::BLAS<int,FadType> sacado_blas(false);
4251  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4252 
4253  COMPARE_FAD_VECTORS(B1, B2, m*n);
4254 
4255  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4256  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4258  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4259 
4260  COMPARE_FAD_VECTORS(B1, B3, m*n);
4261 
4263  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4265  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4267  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4268  COMPARE_FAD_VECTORS(B1, B2, m*n);
4269  COMPARE_FAD_VECTORS(B1, B3, m*n);
4270 
4272  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4274  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4276  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4277  COMPARE_FAD_VECTORS(B1, B2, m*n);
4278  COMPARE_FAD_VECTORS(B1, B3, m*n);
4279 
4280  for (unsigned int i=0; i<m; i++) {
4281  A[i*m+i].val() = 1.0;
4282  for (unsigned int k=0; k<ndot; k++)
4283  A[i*m+i].fastAccessDx(k) = 0.0;
4284  }
4286  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4288  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4290  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4291  COMPARE_FAD_VECTORS(B1, B2, m*n);
4292  COMPARE_FAD_VECTORS(B1, B3, m*n);
4293 }
4294 
4295 // Tests constant B
4297  typedef decltype(this->fad) FadType;
4298  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4299  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4300  auto m = this->m_;
4301  auto n = this->n_;
4302  auto ndot = this->ndot_;
4303 
4304  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4305  for (unsigned int j=0; j<m; j++) {
4306  for (unsigned int i=0; i<m; i++) {
4307  A[i+j*m] = FadType(ndot, this->urand.number());
4308  for (unsigned int k=0; k<ndot; k++)
4309  A[i+j*m].fastAccessDx(k) = this->urand.number();
4310  }
4311  }
4312  FadType alpha(ndot, this->urand.number());
4313  for (unsigned int k=0; k<ndot; k++) {
4314  alpha.fastAccessDx(k) = this->urand.number();
4315  }
4316 
4317  for (unsigned int j=0; j<n; j++) {
4318  for (unsigned int i=0; i<m; i++) {
4319  ScalarType val = this->urand.number();
4320  B1[i+j*m] = val;
4321  B2[i+j*m] = val;
4322  B3[i+j*m] = val;
4323  }
4324  }
4325 
4326  Teuchos::BLAS<int,FadType> teuchos_blas;
4328  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4329 
4330  Teuchos::BLAS<int,FadType> sacado_blas(false);
4332  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4333 
4334  COMPARE_FAD_VECTORS(B1, B2, m*n);
4335 
4336  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4337  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4339  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4340 
4341  COMPARE_FAD_VECTORS(B1, B3, m*n);
4342 
4344  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4346  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4348  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4349  COMPARE_FAD_VECTORS(B1, B2, m*n);
4350  COMPARE_FAD_VECTORS(B1, B3, m*n);
4351 
4353  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4355  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4357  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4358  COMPARE_FAD_VECTORS(B1, B2, m*n);
4359  COMPARE_FAD_VECTORS(B1, B3, m*n);
4360 
4361  for (unsigned int i=0; i<m; i++) {
4362  A[i*m+i].val() = 1.0;
4363  for (unsigned int k=0; k<ndot; k++)
4364  A[i*m+i].fastAccessDx(k) = 0.0;
4365  }
4367  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4369  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4371  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4372  COMPARE_FAD_VECTORS(B1, B2, m*n);
4373  COMPARE_FAD_VECTORS(B1, B3, m*n);
4374 }
4375 
4376 // Tests constant A
4378  typedef decltype(this->fad) FadType;
4379  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4380  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4381  auto m = this->m_;
4382  auto n = this->n_;
4383  auto ndot = this->ndot_;
4384 
4385  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4386  B4(m*n,ndot), B5(m*n,ndot);
4387  std::vector<ScalarType> a(m*m);
4388  for (unsigned int j=0; j<m; j++) {
4389  for (unsigned int i=0; i<m; i++) {
4390  a[i+j*m] = this->urand.number();
4391  A[i+j*m] = a[i+j*m];
4392  }
4393  }
4394  FadType alpha(ndot, this->urand.number());
4395  for (unsigned int k=0; k<ndot; k++) {
4396  alpha.fastAccessDx(k) = this->urand.number();
4397  }
4398 
4399  for (unsigned int j=0; j<n; j++) {
4400  for (unsigned int i=0; i<m; i++) {
4401  ScalarType val = this->urand.number();
4402  B1[i+j*m] = FadType(ndot, val);
4403  B2[i+j*m] = FadType(ndot, val);
4404  B3[i+j*m] = FadType(ndot, val);
4405  B4[i+j*m] = FadType(ndot, val);
4406  B5[i+j*m] = FadType(ndot, val);
4407  for (unsigned int k=0; k<ndot; k++) {
4408  val = this->urand.number();
4409  B1[i+j*m].fastAccessDx(k) = val;
4410  B2[i+j*m].fastAccessDx(k) = val;
4411  B3[i+j*m].fastAccessDx(k) = val;
4412  B4[i+j*m].fastAccessDx(k) = val;
4413  B5[i+j*m].fastAccessDx(k) = val;
4414  }
4415  }
4416  }
4417 
4418  Teuchos::BLAS<int,FadType> teuchos_blas;
4420  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4421 
4422  Teuchos::BLAS<int,FadType> sacado_blas(false);
4424  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4425 
4426  COMPARE_FAD_VECTORS(B1, B2, m*n);
4427 
4428  unsigned int sz = m*m + m*n*(1+ndot);
4429  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4431  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4432 
4433  COMPARE_FAD_VECTORS(B1, B3, m*n);
4434 
4436  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4437 
4438  COMPARE_FAD_VECTORS(B1, B4, m*n);
4439 
4441  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4442 
4443  COMPARE_FAD_VECTORS(B1, B5, m*n);
4444 
4446  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4448  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4450  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4452  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4454  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4455  COMPARE_FAD_VECTORS(B1, B2, m*n);
4456  COMPARE_FAD_VECTORS(B1, B3, m*n);
4457  COMPARE_FAD_VECTORS(B1, B4, m*n);
4458  COMPARE_FAD_VECTORS(B1, B5, m*n);
4459 
4461  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4463  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4465  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4467  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4469  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4470  COMPARE_FAD_VECTORS(B1, B2, m*n);
4471  COMPARE_FAD_VECTORS(B1, B3, m*n);
4472  COMPARE_FAD_VECTORS(B1, B4, m*n);
4473  COMPARE_FAD_VECTORS(B1, B5, m*n);
4474 
4475  for (unsigned int i=0; i<m; i++) {
4476  A[i*m+i].val() = 1.0;
4477  for (unsigned int k=0; k<ndot; k++)
4478  A[i*m+i].fastAccessDx(k) = 0.0;
4479  }
4481  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4483  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4485  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4487  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4489  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4490  COMPARE_FAD_VECTORS(B1, B2, m*n);
4491  COMPARE_FAD_VECTORS(B1, B3, m*n);
4492  COMPARE_FAD_VECTORS(B1, B4, m*n);
4493  COMPARE_FAD_VECTORS(B1, B5, m*n);
4494 }
4495 
4496 // Tests all arguments, left side
4498  typedef decltype(this->fad) FadType;
4499  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4500  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4501  auto m = this->m_;
4502  auto n = this->n_;
4503  auto ndot = this->ndot_;
4504 
4505  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4506  for (unsigned int j=0; j<m; j++) {
4507  for (unsigned int i=0; i<m; i++) {
4508  //A[i+j*m] = this->urand.number();
4509  A[i+j*m] = FadType(ndot, this->urand.number());
4510  for (unsigned int k=0; k<ndot; k++)
4511  A[i+j*m].fastAccessDx(k) = this->urand.number();
4512  }
4513  }
4514  FadType alpha(ndot, this->urand.number());
4515  for (unsigned int k=0; k<ndot; k++) {
4516  alpha.fastAccessDx(k) = this->urand.number();
4517  }
4518  //ScalarType alpha = this->urand.number();
4519 
4520  for (unsigned int j=0; j<n; j++) {
4521  for (unsigned int i=0; i<m; i++) {
4522  ScalarType val = this->urand.number();
4523  // B1[i+j*m] = val;
4524  // B2[i+j*m] = val;
4525  // B3[i+j*m] = val;
4526  B1[i+j*m] = FadType(ndot, val);
4527  B2[i+j*m] = FadType(ndot, val);
4528  B3[i+j*m] = FadType(ndot, val);
4529  for (unsigned int k=0; k<ndot; k++) {
4530  val = this->urand.number();
4531  B1[i+j*m].fastAccessDx(k) = val;
4532  B2[i+j*m].fastAccessDx(k) = val;
4533  B3[i+j*m].fastAccessDx(k) = val;
4534  }
4535  }
4536  }
4537 
4538  Teuchos::BLAS<int,FadType> teuchos_blas;
4540  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4541 
4542  Teuchos::BLAS<int,FadType> sacado_blas(false);
4544  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4545 
4546  COMPARE_FAD_VECTORS(B1, B2, m*n);
4547 
4548  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4549  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4551  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4552 
4553  COMPARE_FAD_VECTORS(B1, B3, m*n);
4554 
4556  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4558  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4560  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4561  COMPARE_FAD_VECTORS(B1, B2, m*n);
4562  COMPARE_FAD_VECTORS(B1, B3, m*n);
4563 
4565  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4567  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4569  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4570  COMPARE_FAD_VECTORS(B1, B2, m*n);
4571  COMPARE_FAD_VECTORS(B1, B3, m*n);
4572 
4573  for (unsigned int i=0; i<m; i++) {
4574  A[i*m+i].val() = 1.0;
4575  for (unsigned int k=0; k<ndot; k++)
4576  A[i*m+i].fastAccessDx(k) = 0.0;
4577  }
4579  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4581  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4583  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4584  COMPARE_FAD_VECTORS(B1, B2, m*n);
4585  COMPARE_FAD_VECTORS(B1, B3, m*n);
4586 }
4587 
4588 // Tests all arguments, right side
4590  typedef decltype(this->fad) FadType;
4591  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4592  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4593  auto m = this->m_;
4594  auto n = this->n_;
4595  auto ndot = this->ndot_;
4596 
4597  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4598  for (unsigned int j=0; j<n; j++) {
4599  for (unsigned int i=0; i<n; i++) {
4600  A[i+j*n] = FadType(ndot, this->urand.number());
4601  for (unsigned int k=0; k<ndot; k++)
4602  A[i+j*n].fastAccessDx(k) = this->urand.number();
4603  }
4604  }
4605  FadType alpha(ndot, this->urand.number());
4606  for (unsigned int k=0; k<ndot; k++) {
4607  alpha.fastAccessDx(k) = this->urand.number();
4608  }
4609 
4610  for (unsigned int j=0; j<n; j++) {
4611  for (unsigned int i=0; i<m; i++) {
4612  ScalarType val = this->urand.number();
4613  B1[i+j*m] = FadType(ndot, val);
4614  B2[i+j*m] = FadType(ndot, val);
4615  B3[i+j*m] = FadType(ndot, val);
4616  for (unsigned int k=0; k<ndot; k++) {
4617  val = this->urand.number();
4618  B1[i+j*m].fastAccessDx(k) = val;
4619  B2[i+j*m].fastAccessDx(k) = val;
4620  B3[i+j*m].fastAccessDx(k) = val;
4621  }
4622  }
4623  }
4624 
4625  Teuchos::BLAS<int,FadType> teuchos_blas;
4627  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4628 
4629  Teuchos::BLAS<int,FadType> sacado_blas(false);
4631  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4632 
4633  COMPARE_FAD_VECTORS(B1, B2, m*n);
4634 
4635  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4636  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4638  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4639 
4640  COMPARE_FAD_VECTORS(B1, B3, m*n);
4641 
4643  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4645  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4647  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4648  COMPARE_FAD_VECTORS(B1, B2, m*n);
4649  COMPARE_FAD_VECTORS(B1, B3, m*n);
4650 
4652  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4654  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4656  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4657  COMPARE_FAD_VECTORS(B1, B2, m*n);
4658  COMPARE_FAD_VECTORS(B1, B3, m*n);
4659 
4660  for (unsigned int i=0; i<n; i++) {
4661  A[i*n+i].val() = 1.0;
4662  for (unsigned int k=0; k<ndot; k++)
4663  A[i*n+i].fastAccessDx(k) = 0.0;
4664  }
4666  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4668  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4670  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4671  COMPARE_FAD_VECTORS(B1, B2, m*n);
4672  COMPARE_FAD_VECTORS(B1, B3, m*n);
4673 }
4674 
4675 // Tests all arguments, left side, different lda, ldb
4677  typedef decltype(this->fad) FadType;
4678  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4679  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4680  auto m = this->m_;
4681  auto n = this->n_;
4682  auto ndot = this->ndot_;
4683 
4684  unsigned int lda = m+4;
4685  unsigned int ldb = m+5;
4686  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4687  for (unsigned int j=0; j<m; j++) {
4688  for (unsigned int i=0; i<lda; i++) {
4689  A[i+j*lda] = FadType(ndot, this->urand.number());
4690  for (unsigned int k=0; k<ndot; k++)
4691  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4692  }
4693  }
4694  FadType alpha(ndot, this->urand.number());
4695  for (unsigned int k=0; k<ndot; k++) {
4696  alpha.fastAccessDx(k) = this->urand.number();
4697  }
4698 
4699  for (unsigned int j=0; j<n; j++) {
4700  for (unsigned int i=0; i<ldb; i++) {
4701  ScalarType val = this->urand.number();
4702  B1[i+j*ldb] = FadType(ndot, val);
4703  B2[i+j*ldb] = FadType(ndot, val);
4704  B3[i+j*ldb] = FadType(ndot, val);
4705  for (unsigned int k=0; k<ndot; k++) {
4706  val = this->urand.number();
4707  B1[i+j*ldb].fastAccessDx(k) = val;
4708  B2[i+j*ldb].fastAccessDx(k) = val;
4709  B3[i+j*ldb].fastAccessDx(k) = val;
4710  }
4711  }
4712  }
4713 
4714  Teuchos::BLAS<int,FadType> teuchos_blas;
4716  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4717 
4718  Teuchos::BLAS<int,FadType> sacado_blas(false);
4720  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4721 
4722  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4723 
4724  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4725  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4727  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4728 
4729  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4730 
4732  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4734  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4736  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4737  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4738  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4739 
4741  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4743  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4745  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4746  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4747  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4748 
4749  for (unsigned int i=0; i<m; i++) {
4750  A[i*lda+i].val() = 1.0;
4751  for (unsigned int k=0; k<ndot; k++)
4752  A[i*lda+i].fastAccessDx(k) = 0.0;
4753  }
4755  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4757  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4759  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4760  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4761  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4762 }
4763 
4764 // Tests all arguments, right side, different lda, ldb
4766  typedef decltype(this->fad) FadType;
4767  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4768  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4769  auto m = this->m_;
4770  auto n = this->n_;
4771  auto ndot = this->ndot_;
4772 
4773  unsigned int lda = n+4;
4774  unsigned int ldb = m+5;
4775  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4776  for (unsigned int j=0; j<n; j++) {
4777  for (unsigned int i=0; i<lda; i++) {
4778  A[i+j*lda] = FadType(ndot, this->urand.number());
4779  for (unsigned int k=0; k<ndot; k++)
4780  A[i+j*lda].fastAccessDx(k) = this->urand.number();
4781  }
4782  }
4783  FadType alpha(ndot, this->urand.number());
4784  for (unsigned int k=0; k<ndot; k++) {
4785  alpha.fastAccessDx(k) = this->urand.number();
4786  }
4787 
4788  for (unsigned int j=0; j<n; j++) {
4789  for (unsigned int i=0; i<ldb; i++) {
4790  ScalarType val = this->urand.number();
4791  B1[i+j*ldb] = FadType(ndot, val);
4792  B2[i+j*ldb] = FadType(ndot, val);
4793  B3[i+j*ldb] = FadType(ndot, val);
4794  for (unsigned int k=0; k<ndot; k++) {
4795  val = this->urand.number();
4796  B1[i+j*ldb].fastAccessDx(k) = val;
4797  B2[i+j*ldb].fastAccessDx(k) = val;
4798  B3[i+j*ldb].fastAccessDx(k) = val;
4799  }
4800  }
4801  }
4802 
4803  Teuchos::BLAS<int,FadType> teuchos_blas;
4805  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4806 
4807  Teuchos::BLAS<int,FadType> sacado_blas(false);
4809  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4810 
4811  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4812 
4813  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4814  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4816  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4817 
4818  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4819 
4821  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4823  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4825  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4826  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4827  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4828 
4830  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4832  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4834  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4835  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4836  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4837 
4838  for (unsigned int i=0; i<n; i++) {
4839  A[i*lda+i].val() = 1.0;
4840  for (unsigned int k=0; k<ndot; k++)
4841  A[i*lda+i].fastAccessDx(k) = 0.0;
4842  }
4844  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4846  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4848  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4849  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4850  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4851 }
4852 
4853 // Tests constant alpha
4855  typedef decltype(this->fad) FadType;
4856  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4857  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4858  auto m = this->m_;
4859  auto n = this->n_;
4860  auto ndot = this->ndot_;
4861 
4862  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4863  for (unsigned int j=0; j<m; j++) {
4864  for (unsigned int i=0; i<m; i++) {
4865  A[i+j*m] = FadType(ndot, this->urand.number());
4866  for (unsigned int k=0; k<ndot; k++)
4867  A[i+j*m].fastAccessDx(k) = this->urand.number();
4868  }
4869  }
4870  ScalarType alpha = this->urand.number();
4871 
4872  for (unsigned int j=0; j<n; j++) {
4873  for (unsigned int i=0; i<m; i++) {
4874  ScalarType val = this->urand.number();
4875  B1[i+j*m] = FadType(ndot, val);
4876  B2[i+j*m] = FadType(ndot, val);
4877  B3[i+j*m] = FadType(ndot, val);
4878  for (unsigned int k=0; k<ndot; k++) {
4879  val = this->urand.number();
4880  B1[i+j*m].fastAccessDx(k) = val;
4881  B2[i+j*m].fastAccessDx(k) = val;
4882  B3[i+j*m].fastAccessDx(k) = val;
4883  }
4884  }
4885  }
4886 
4887  Teuchos::BLAS<int,FadType> teuchos_blas;
4889  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4890 
4891  Teuchos::BLAS<int,FadType> sacado_blas(false);
4893  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4894 
4895  COMPARE_FAD_VECTORS(B1, B2, m*n);
4896 
4897  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4898  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4900  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4901 
4902  COMPARE_FAD_VECTORS(B1, B3, m*n);
4903 
4905  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4907  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4909  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4910  COMPARE_FAD_VECTORS(B1, B2, m*n);
4911  COMPARE_FAD_VECTORS(B1, B3, m*n);
4912 
4914  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4916  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4918  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4919  COMPARE_FAD_VECTORS(B1, B2, m*n);
4920  COMPARE_FAD_VECTORS(B1, B3, m*n);
4921 
4922  for (unsigned int i=0; i<m; i++) {
4923  A[i*m+i].val() = 1.0;
4924  for (unsigned int k=0; k<ndot; k++)
4925  A[i*m+i].fastAccessDx(k) = 0.0;
4926  }
4928  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4930  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4932  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4933  COMPARE_FAD_VECTORS(B1, B2, m*n);
4934  COMPARE_FAD_VECTORS(B1, B3, m*n);
4935 }
4936 
4937 // Tests constant B
4939  typedef decltype(this->fad) FadType;
4940  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4941  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
4942  auto m = this->m_;
4943  auto n = this->n_;
4944  auto ndot = this->ndot_;
4945 
4946  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4947  for (unsigned int j=0; j<m; j++) {
4948  for (unsigned int i=0; i<m; i++) {
4949  A[i+j*m] = FadType(ndot, this->urand.number());
4950  for (unsigned int k=0; k<ndot; k++)
4951  A[i+j*m].fastAccessDx(k) = this->urand.number();
4952  }
4953  }
4954  FadType alpha(ndot, this->urand.number());
4955  for (unsigned int k=0; k<ndot; k++) {
4956  alpha.fastAccessDx(k) = this->urand.number();
4957  }
4958 
4959  for (unsigned int j=0; j<n; j++) {
4960  for (unsigned int i=0; i<m; i++) {
4961  ScalarType val = this->urand.number();
4962  B1[i+j*m] = val;
4963  B2[i+j*m] = val;
4964  B3[i+j*m] = val;
4965  }
4966  }
4967 
4968  Teuchos::BLAS<int,FadType> teuchos_blas;
4970  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4971 
4972  Teuchos::BLAS<int,FadType> sacado_blas(false);
4974  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4975 
4976  COMPARE_FAD_VECTORS(B1, B2, m*n);
4977 
4978  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4979  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4981  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4982 
4983  COMPARE_FAD_VECTORS(B1, B3, m*n);
4984 
4986  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4988  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4990  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4991  COMPARE_FAD_VECTORS(B1, B2, m*n);
4992  COMPARE_FAD_VECTORS(B1, B3, m*n);
4993 
4995  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4997  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4999  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5000  COMPARE_FAD_VECTORS(B1, B2, m*n);
5001  COMPARE_FAD_VECTORS(B1, B3, m*n);
5002 
5003  for (unsigned int i=0; i<m; i++) {
5004  A[i*m+i].val() = 1.0;
5005  for (unsigned int k=0; k<ndot; k++)
5006  A[i*m+i].fastAccessDx(k) = 0.0;
5007  }
5009  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5011  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5013  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5014  COMPARE_FAD_VECTORS(B1, B2, m*n);
5015  COMPARE_FAD_VECTORS(B1, B3, m*n);
5016 }
5017 
5018 // Tests constant A
5020  typedef decltype(this->fad) FadType;
5021  typedef typename Sacado::ScalarType<FadType>::type ScalarType;
5022  typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
5023  auto m = this->m_;
5024  auto n = this->n_;
5025  auto ndot = this->ndot_;
5026 
5027  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
5028  B4(m*n,ndot), B5(m*n,ndot);
5029  std::vector<ScalarType> a(m*m);
5030  for (unsigned int j=0; j<m; j++) {
5031  for (unsigned int i=0; i<m; i++) {
5032  a[i+j*m] = this->urand.number();
5033  A[i+j*m] = a[i+j*m];
5034  }
5035  }
5036  FadType alpha(ndot, this->urand.number());
5037  for (unsigned int k=0; k<ndot; k++) {
5038  alpha.fastAccessDx(k) = this->urand.number();
5039  }
5040 
5041  for (unsigned int j=0; j<n; j++) {
5042  for (unsigned int i=0; i<m; i++) {
5043  ScalarType val = this->urand.number();
5044  B1[i+j*m] = FadType(ndot, val);
5045  B2[i+j*m] = FadType(ndot, val);
5046  B3[i+j*m] = FadType(ndot, val);
5047  B4[i+j*m] = FadType(ndot, val);
5048  B5[i+j*m] = FadType(ndot, val);
5049  for (unsigned int k=0; k<ndot; k++) {
5050  val = this->urand.number();
5051  B1[i+j*m].fastAccessDx(k) = val;
5052  B2[i+j*m].fastAccessDx(k) = val;
5053  B3[i+j*m].fastAccessDx(k) = val;
5054  B4[i+j*m].fastAccessDx(k) = val;
5055  B5[i+j*m].fastAccessDx(k) = val;
5056  }
5057  }
5058  }
5059 
5060  Teuchos::BLAS<int,FadType> teuchos_blas;
5062  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5063 
5064  Teuchos::BLAS<int,FadType> sacado_blas(false);
5066  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5067 
5068  COMPARE_FAD_VECTORS(B1, B2, m*n);
5069 
5070  unsigned int sz = m*m + m*n*(1+ndot);
5071  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
5073  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5074 
5075  COMPARE_FAD_VECTORS(B1, B3, m*n);
5076 
5078  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5079 
5080  COMPARE_FAD_VECTORS(B1, B4, m*n);
5081 
5083  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5084 
5085  COMPARE_FAD_VECTORS(B1, B5, m*n);
5086 
5088  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5090  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5092  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5094  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5096  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5097  COMPARE_FAD_VECTORS(B1, B2, m*n);
5098  COMPARE_FAD_VECTORS(B1, B3, m*n);
5099  COMPARE_FAD_VECTORS(B1, B4, m*n);
5100  COMPARE_FAD_VECTORS(B1, B5, m*n);
5101 
5103  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5105  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5107  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5109  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5111  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5112  COMPARE_FAD_VECTORS(B1, B2, m*n);
5113  COMPARE_FAD_VECTORS(B1, B3, m*n);
5114  COMPARE_FAD_VECTORS(B1, B4, m*n);
5115  COMPARE_FAD_VECTORS(B1, B5, m*n);
5116 
5117  for (unsigned int i=0; i<m; i++) {
5118  A[i*m+i].val() = 1.0;
5119  for (unsigned int k=0; k<ndot; k++)
5120  A[i*m+i].fastAccessDx(k) = 0.0;
5121  }
5123  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5125  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5127  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5129  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5131  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5132  COMPARE_FAD_VECTORS(B1, B2, m*n);
5133  COMPARE_FAD_VECTORS(B1, B3, m*n);
5134  COMPARE_FAD_VECTORS(B1, B4, m*n);
5135  COMPARE_FAD_VECTORS(B1, B5, m*n);
5136 }
5137 
5140  testSCAL1,
5141  testSCAL2,
5142  testSCAL3,
5143  testSCAL4,
5144 
5145  testCOPY1,
5146  testCOPY2,
5147  testCOPY3,
5148  testCOPY4,
5149 
5150  testAXPY1,
5151  testAXPY2,
5152  testAXPY3,
5153  testAXPY4,
5154 
5155  testDOT1,
5156  testDOT2,
5157  testDOT3,
5158  testDOT4,
5159 
5160  testNRM21,
5161  testNRM22,
5162 
5163  testGEMV1,
5164  testGEMV2,
5165  testGEMV3,
5166  testGEMV4,
5167  testGEMV5,
5168  testGEMV6,
5169  testGEMV7,
5170  testGEMV8,
5171  testGEMV9,
5172 
5173  testTRMV1,
5174  testTRMV2,
5175  testTRMV3,
5176  testTRMV4,
5177 
5178  testGER1,
5179  testGER2,
5180  testGER3,
5181  testGER4,
5182  testGER5,
5183  testGER6,
5184  testGER7,
5185 
5186  testGEMM1,
5187  testGEMM2,
5188  testGEMM3,
5189  testGEMM4,
5190  testGEMM5,
5191  testGEMM6,
5192  testGEMM7,
5193  testGEMM8,
5194  testGEMM9,
5195  testGEMM10,
5196 
5197  testSYMM1,
5198  testSYMM2,
5199  testSYMM3,
5200  testSYMM4,
5201  testSYMM5,
5202  testSYMM6,
5203  testSYMM7,
5204  testSYMM8,
5205  testSYMM9,
5206 
5207  testTRMM1,
5208  testTRMM2,
5209  testTRMM3,
5210  testTRMM4,
5211  testTRMM5,
5212  testTRMM6,
5213  testTRMM7,
5214 
5215  testTRSM1,
5216  testTRSM2,
5217  testTRSM3,
5218  testTRSM4,
5219  testTRSM5,
5220  testTRSM6,
5221  testTRSM7);
5222 
5223 
5224 #endif // FADBLASUNITTESTS_HPP
Sacado::Random< ScalarType > urand
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Sacado::Random< double > real_urand
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sacado::ScalarType< FadType >::type ScalarType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
expr val()
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
#define A
Definition: Sacado_rad.hpp:572
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
TYPED_TEST_SUITE_P(FadBLASUnitTests)
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
A random number generator that generates random numbers uniformly distributed in the interval (a...
#define COMPARE_FADS(a, b)
Definition: GTestUtils.hpp:112
int n
const double y