Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FadBLASUnitTests.hpp
Go to the documentation of this file.
1 // $Id$
2 // $Source$
3 // @HEADER
4 // ***********************************************************************
5 //
6 // Sacado Package
7 // Copyright (2006) Sandia Corporation
8 //
9 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // This library is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as
14 // published by the Free Software Foundation; either version 2.1 of the
15 // License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 // USA
26 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27 // (etphipp@sandia.gov).
28 //
29 // ***********************************************************************
30 // @HEADER
31 
32 #ifndef FADBLASUNITTESTS_HPP
33 #define FADBLASUNITTESTS_HPP
34 
35 // Sacado includes
36 #include "Sacado_No_Kokkos.hpp"
37 #include "Sacado_Fad_BLAS.hpp"
38 #include "Sacado_Random.hpp"
39 
40 // Cppunit includes
41 #include <cppunit/extensions/HelperMacros.h>
42 
43 #define COMPARE_VALUES(a, b) \
44  CPPUNIT_ASSERT( std::abs(a-b) < this->tol_a + this->tol_r*std::abs(a) );
45 
46 #define COMPARE_FADS(a, b) \
47 CPPUNIT_ASSERT(a.size() == b.size()); \
48 CPPUNIT_ASSERT(a.hasFastAccess() == b.hasFastAccess()); \
49 COMPARE_VALUES(a.val(), b.val()); \
50 for (int k=0; k<a.size(); k++) { \
51  COMPARE_VALUES(a.dx(k), b.dx(k)); \
52  COMPARE_VALUES(a.fastAccessDx(k), b.fastAccessDx(k)); \
53  } \
54  ;
55 
56 #define COMPARE_FAD_VECTORS(X1, X2, n) \
57  CPPUNIT_ASSERT(X1.size() == std::size_t(n)); \
58  CPPUNIT_ASSERT(X2.size() == std::size_t(n)); \
59  for (unsigned int i=0; i<n; i++) { \
60  COMPARE_FADS(X1[i], X2[i]); \
61  } \
62  ;
63 
64 // A class for testing differentiated BLAS operations for general Fad types
65 template <class FadType, class ScalarType>
66 class FadBLASUnitTests : public CppUnit::TestFixture {
67 
69 
71 
76 
81 
86 
91 
94 
104 
109 
117 
128 
138 
146 
154 
156 
157 public:
158 
160 
161  FadBLASUnitTests(int m, int n, int l, int ndot,
162  double absolute_tolerance, double relative_tolerance);
163 
164  void setUp();
165  void tearDown();
166 
167  void testSCAL1();
168  void testSCAL2();
169  void testSCAL3();
170  void testSCAL4();
171 
172  void testCOPY1();
173  void testCOPY2();
174  void testCOPY3();
175  void testCOPY4();
176 
177  void testAXPY1();
178  void testAXPY2();
179  void testAXPY3();
180  void testAXPY4();
181 
182  void testDOT1();
183  void testDOT2();
184  void testDOT3();
185  void testDOT4();
186 
187  void testNRM21();
188  void testNRM22();
189 
190  void testGEMV1();
191  void testGEMV2();
192  void testGEMV3();
193  void testGEMV4();
194  void testGEMV5();
195  void testGEMV6();
196  void testGEMV7();
197  void testGEMV8();
198  void testGEMV9();
199 
200  void testTRMV1();
201  void testTRMV2();
202  void testTRMV3();
203  void testTRMV4();
204 
205  void testGER1();
206  void testGER2();
207  void testGER3();
208  void testGER4();
209  void testGER5();
210  void testGER6();
211  void testGER7();
212 
213  void testGEMM1();
214  void testGEMM2();
215  void testGEMM3();
216  void testGEMM4();
217  void testGEMM5();
218  void testGEMM6();
219  void testGEMM7();
220  void testGEMM8();
221  void testGEMM9();
222  void testGEMM10();
223 
224  void testSYMM1();
225  void testSYMM2();
226  void testSYMM3();
227  void testSYMM4();
228  void testSYMM5();
229  void testSYMM6();
230  void testSYMM7();
231  void testSYMM8();
232  void testSYMM9();
233 
234  void testTRMM1();
235  void testTRMM2();
236  void testTRMM3();
237  void testTRMM4();
238  void testTRMM5();
239  void testTRMM6();
240  void testTRMM7();
241 
242  void testTRSM1();
243  void testTRSM2();
244  void testTRSM3();
245  void testTRSM4();
246  void testTRSM5();
247  void testTRSM6();
248  void testTRSM7();
249 
250 protected:
251 
252  // Random number generator
254 
255  // Real random number generator for derivative components
257 
258  // Number of matrix rows
259  unsigned int m;
260 
261  // Number of matrix columns
262  unsigned int n;
263 
264  // Number of matrix columns for level 3 blas
265  unsigned int l;
266 
267  // Number of derivative components
268  unsigned int ndot;
269 
270  // Tolerances to which fad objects should be the same
271  double tol_a, tol_r;
272 
273 }; // class FadBLASUnitTests
274 
275 template <class FadType, class ScalarType>
278  urand(), real_urand(), m(5), n(6), l(4), ndot(7), tol_a(1.0e-11), tol_r(1.0e-11) {}
279 
280 template <class FadType, class ScalarType>
282 FadBLASUnitTests(int m_, int n_, int l_, int ndot_, double absolute_tolerance,
283  double relative_tolerance) :
284  urand(),
285  real_urand(),
286  m(m_),
287  n(n_),
288  l(l_),
289  ndot(ndot_),
290  tol_a(absolute_tolerance),
291  tol_r(relative_tolerance) {}
292 
293 template <class FadType, class ScalarType>
295 setUp() {}
296 
297 template <class FadType, class ScalarType>
300 
301 // Tests all arguments
302 template <class FadType, class ScalarType>
303 void
306  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
307  for (unsigned int i=0; i<m; i++) {
308  ScalarType val = urand.number();
309  x1[i] = FadType(ndot, val);
310  x2[i] = FadType(ndot, val);
311  x3[i] = FadType(ndot, val);
312  for (unsigned int k=0; k<ndot; k++) {
313  val = urand.number();
314  x1[i].fastAccessDx(k) = val;
315  x2[i].fastAccessDx(k) = val;
316  x3[i].fastAccessDx(k) = val;
317  }
318  }
319  FadType alpha(ndot, urand.number());
320  for (unsigned int k=0; k<ndot; k++) {
321  alpha.fastAccessDx(k) = urand.number();
322  }
323 
324  Teuchos::BLAS<int,FadType> teuchos_blas;
325  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
326 
327  Teuchos::BLAS<int,FadType> sacado_blas(false);
328  sacado_blas.SCAL(m, alpha, &x2[0], 1);
329 
330  COMPARE_FAD_VECTORS(x1, x2, m);
331 
332  unsigned int sz = m*(1+ndot);
333  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
334  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
335 
336  COMPARE_FAD_VECTORS(x1, x3, m);
337 }
338 
339 // Tests non-unit inc
340 template <class FadType, class ScalarType>
341 void
344  unsigned int incx = 2;
345  VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
346  for (unsigned int i=0; i<m*incx; i++) {
347  ScalarType val = urand.number();
348  x1[i] = FadType(ndot, val);
349  x2[i] = FadType(ndot, val);
350  x3[i] = FadType(ndot, val);
351  for (unsigned int k=0; k<ndot; k++) {
352  val = urand.number();
353  x1[i].fastAccessDx(k) = val;
354  x2[i].fastAccessDx(k) = val;
355  x3[i].fastAccessDx(k) = val;
356  }
357  }
358  FadType alpha(ndot, urand.number());
359  for (unsigned int k=0; k<ndot; k++) {
360  alpha.fastAccessDx(k) = urand.number();
361  }
362 
363  Teuchos::BLAS<int,FadType> teuchos_blas;
364  teuchos_blas.SCAL(m, alpha, &x1[0], incx);
365 
366  Teuchos::BLAS<int,FadType> sacado_blas(false);
367  sacado_blas.SCAL(m, alpha, &x2[0], incx);
368 
369  COMPARE_FAD_VECTORS(x1, x2, m*incx);
370 
371  unsigned int sz = m*(1+ndot);
372  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
373  sacado_blas2.SCAL(m, alpha, &x3[0], incx);
374 
375  COMPARE_FAD_VECTORS(x1, x3, m*incx);
376 }
377 
378 // Tests constant alpha
379 template <class FadType, class ScalarType>
380 void
383  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
384  for (unsigned int i=0; i<m; i++) {
385  ScalarType val = urand.number();
386  x1[i] = FadType(ndot, val);
387  x2[i] = FadType(ndot, val);
388  x3[i] = FadType(ndot, val);
389  for (unsigned int k=0; k<ndot; k++) {
390  val = urand.number();
391  x1[i].fastAccessDx(k) = val;
392  x2[i].fastAccessDx(k) = val;
393  x3[i].fastAccessDx(k) = val;
394  }
395  }
396  ScalarType alpha = urand.number();
397 
398  Teuchos::BLAS<int,FadType> teuchos_blas;
399  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
400 
401  Teuchos::BLAS<int,FadType> sacado_blas(false);
402  sacado_blas.SCAL(m, alpha, &x2[0], 1);
403 
404  COMPARE_FAD_VECTORS(x1, x2, m);
405 
406  unsigned int sz = m*(1+ndot);
407  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
408  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
409 
410  COMPARE_FAD_VECTORS(x1, x3, m);
411 }
412 
413 // Tests constant x
414 template <class FadType, class ScalarType>
415 void
418  VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
419  for (unsigned int i=0; i<m; i++) {
420  ScalarType val = urand.number();
421  x1[i] = val;
422  x2[i] = val;
423  x3[i] = val;
424  }
425  FadType alpha = FadType(ndot, urand.number());
426  for (unsigned int k=0; k<ndot; k++)
427  alpha.fastAccessDx(k) = urand.number();
428 
429  Teuchos::BLAS<int,FadType> teuchos_blas;
430  teuchos_blas.SCAL(m, alpha, &x1[0], 1);
431 
432  Teuchos::BLAS<int,FadType> sacado_blas(false);
433  sacado_blas.SCAL(m, alpha, &x2[0], 1);
434 
435  COMPARE_FAD_VECTORS(x1, x2, m);
436 
437  unsigned int sz = m*(1+ndot);
438  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
439  sacado_blas2.SCAL(m, alpha, &x3[0], 1);
440 
441  COMPARE_FAD_VECTORS(x1, x3, m);
442 }
443 
444 // Tests all arguments
445 template <class FadType, class ScalarType>
446 void
449  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
450  for (unsigned int i=0; i<m; i++) {
451  x[i] = FadType(ndot, urand.number());
452  ScalarType val = urand.number();
453  y1[i] = FadType(ndot, val);
454  y2[i] = FadType(ndot, val);
455  y3[i] = FadType(ndot, val);
456  for (unsigned int k=0; k<ndot; k++) {
457  x[i].fastAccessDx(k) = urand.number();
458  val = urand.number();
459  y1[i].fastAccessDx(k) = val;
460  y2[i].fastAccessDx(k) = val;
461  y3[i].fastAccessDx(k) = val;
462  }
463  }
464 
465  Teuchos::BLAS<int,FadType> teuchos_blas;
466  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
467 
468  Teuchos::BLAS<int,FadType> sacado_blas(false);
469  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
470 
471  COMPARE_FAD_VECTORS(y1, y2, m);
472 
473  unsigned int sz = 2*m*(1+ndot);
474  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
475  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
476 
477  COMPARE_FAD_VECTORS(y1, y3, m);
478 }
479 
480 // Tests non unit inc
481 template <class FadType, class ScalarType>
482 void
485  unsigned int incx = 2;
486  unsigned int incy = 3;
487  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
488  for (unsigned int i=0; i<m*incx; i++) {
489  x[i] = FadType(ndot, urand.number());
490  for (unsigned int k=0; k<ndot; k++) {
491  x[i].fastAccessDx(k) = urand.number();
492  }
493  }
494  for (unsigned int i=0; i<m*incy; i++) {
495  ScalarType val = urand.number();
496  y1[i] = FadType(ndot, val);
497  y2[i] = FadType(ndot, val);
498  y3[i] = FadType(ndot, val);
499  for (unsigned int k=0; k<ndot; k++) {
500  val = urand.number();
501  y1[i].fastAccessDx(k) = val;
502  y2[i].fastAccessDx(k) = val;
503  y3[i].fastAccessDx(k) = val;
504  }
505  }
506 
507  Teuchos::BLAS<int,FadType> teuchos_blas;
508  teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
509 
510  Teuchos::BLAS<int,FadType> sacado_blas(false);
511  sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
512 
513  COMPARE_FAD_VECTORS(y1, y2, m*incy);
514 
515  unsigned int sz = 2*m*(1+ndot);
516  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
517  sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
518 
519  COMPARE_FAD_VECTORS(y1, y3, m*incy);
520 }
521 
522 // Tests x constant
523 template <class FadType, class ScalarType>
524 void
527  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
528  for (unsigned int i=0; i<m; i++) {
529  x[i] = urand.number();
530  }
531  for (unsigned int i=0; i<m; i++) {
532  ScalarType val = urand.number();
533  y1[i] = FadType(ndot, val);
534  y2[i] = FadType(ndot, val);
535  y3[i] = FadType(ndot, val);
536  for (unsigned int k=0; k<ndot; k++) {
537  val = urand.number();
538  y1[i].fastAccessDx(k) = val;
539  y2[i].fastAccessDx(k) = val;
540  y3[i].fastAccessDx(k) = val;
541  }
542  }
543 
544  Teuchos::BLAS<int,FadType> teuchos_blas;
545  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
546 
547  Teuchos::BLAS<int,FadType> sacado_blas(false);
548  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
549 
550  COMPARE_FAD_VECTORS(y1, y2, m);
551 
552  unsigned int sz = 2*m*(1+ndot);
553  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
554  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
555 
556  COMPARE_FAD_VECTORS(y1, y3, m);
557 }
558 
559 // Tests y constant
560 template <class FadType, class ScalarType>
561 void
564  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
565  for (unsigned int i=0; i<m; i++) {
566  x[i] = FadType(ndot, urand.number());
567  ScalarType val = urand.number();
568  y1[i] = val;
569  y2[i] = val;
570  y3[i] = val;
571  for (unsigned int k=0; k<ndot; k++) {
572  x[i].fastAccessDx(k) = urand.number();
573  }
574  }
575 
576  Teuchos::BLAS<int,FadType> teuchos_blas;
577  teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
578 
579  Teuchos::BLAS<int,FadType> sacado_blas(false);
580  sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
581 
582  COMPARE_FAD_VECTORS(y1, y2, m);
583 
584  unsigned int sz = 2*m*(1+ndot);
585  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
586  sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
587 
588  COMPARE_FAD_VECTORS(y1, y3, m);
589 }
590 
591 // Tests all arguments
592 template <class FadType, class ScalarType>
593 void
596  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
597  for (unsigned int i=0; i<m; i++) {
598  x[i] = FadType(ndot, urand.number());
599  ScalarType val = urand.number();
600  y1[i] = FadType(ndot, val);
601  y2[i] = FadType(ndot, val);
602  y3[i] = FadType(ndot, val);
603  for (unsigned int k=0; k<ndot; k++) {
604  x[i].fastAccessDx(k) = urand.number();
605  val = urand.number();
606  y1[i].fastAccessDx(k) = val;
607  y2[i].fastAccessDx(k) = val;
608  y3[i].fastAccessDx(k) = val;
609  }
610  }
611  FadType alpha(ndot, urand.number());
612  for (unsigned int k=0; k<ndot; k++)
613  alpha.fastAccessDx(k) = urand.number();
614 
615  Teuchos::BLAS<int,FadType> teuchos_blas;
616  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
617 
618  Teuchos::BLAS<int,FadType> sacado_blas(false);
619  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
620 
621  COMPARE_FAD_VECTORS(y1, y2, m);
622 
623  unsigned int sz = 2*m*(1+ndot);
624  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
625  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
626 
627  COMPARE_FAD_VECTORS(y1, y3, m);
628 }
629 
630 // Tests non unit inc
631 template <class FadType, class ScalarType>
632 void
635  unsigned int incx = 2;
636  unsigned int incy = 3;
637  VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
638  for (unsigned int i=0; i<m*incx; i++) {
639  x[i] = FadType(ndot, urand.number());
640  for (unsigned int k=0; k<ndot; k++) {
641  x[i].fastAccessDx(k) = urand.number();
642  }
643  }
644  for (unsigned int i=0; i<m*incy; i++) {
645  ScalarType val = urand.number();
646  y1[i] = FadType(ndot, val);
647  y2[i] = FadType(ndot, val);
648  y3[i] = FadType(ndot, val);
649  for (unsigned int k=0; k<ndot; k++) {
650  val = urand.number();
651  y1[i].fastAccessDx(k) = val;
652  y2[i].fastAccessDx(k) = val;
653  y3[i].fastAccessDx(k) = val;
654  }
655  }
656  FadType alpha(ndot, urand.number());
657  for (unsigned int k=0; k<ndot; k++)
658  alpha.fastAccessDx(k) = urand.number();
659 
660  Teuchos::BLAS<int,FadType> teuchos_blas;
661  teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
662 
663  Teuchos::BLAS<int,FadType> sacado_blas(false);
664  sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
665 
666  COMPARE_FAD_VECTORS(y1, y2, m*incy);
667 
668  unsigned int sz = 2*m*(1+ndot);
669  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
670  sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
671 
672  COMPARE_FAD_VECTORS(y1, y3, m*incy);
673 }
674 
675 // Tests x constant
676 template <class FadType, class ScalarType>
677 void
680  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
681  std::vector<ScalarType> xx(m);
682  for (unsigned int i=0; i<m; i++) {
683  xx[i] = urand.number();
684  x[i] = xx[i];
685  ScalarType val = urand.number();
686  y1[i] = FadType(ndot, val);
687  y2[i] = FadType(ndot, val);
688  y3[i] = FadType(ndot, val);
689  y4[i] = FadType(ndot, val);
690  for (unsigned int k=0; k<ndot; k++) {
691  val = urand.number();
692  y1[i].fastAccessDx(k) = val;
693  y2[i].fastAccessDx(k) = val;
694  y3[i].fastAccessDx(k) = val;
695  y4[i].fastAccessDx(k) = val;
696  }
697  }
698  FadType alpha(ndot, urand.number());
699  for (unsigned int k=0; k<ndot; k++)
700  alpha.fastAccessDx(k) = urand.number();
701 
702  Teuchos::BLAS<int,FadType> teuchos_blas;
703  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
704 
705  Teuchos::BLAS<int,FadType> sacado_blas(false);
706  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
707 
708  COMPARE_FAD_VECTORS(y1, y2, m);
709 
710  unsigned int sz = m*(1+ndot)+m;
711  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
712  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
713 
714  COMPARE_FAD_VECTORS(y1, y3, m);
715 
716  sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
717 
718  COMPARE_FAD_VECTORS(y1, y4, m);
719 }
720 
721 // Tests y constant
722 template <class FadType, class ScalarType>
723 void
726  VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
727  for (unsigned int i=0; i<m; i++) {
728  x[i] = FadType(ndot, urand.number());
729  ScalarType val = urand.number();
730  y1[i] = val;
731  y2[i] = val;
732  y3[i] = val;
733  for (unsigned int k=0; k<ndot; k++) {
734  x[i].fastAccessDx(k) = urand.number();
735  }
736  }
737  FadType alpha(ndot, urand.number());
738  for (unsigned int k=0; k<ndot; k++)
739  alpha.fastAccessDx(k) = urand.number();
740 
741  Teuchos::BLAS<int,FadType> teuchos_blas;
742  teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
743 
744  Teuchos::BLAS<int,FadType> sacado_blas(false);
745  sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
746 
747  COMPARE_FAD_VECTORS(y1, y2, m);
748 
749  unsigned int sz = 2*m*(1+ndot);
750  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
751  sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
752 
753  COMPARE_FAD_VECTORS(y1, y3, m);
754 }
755 
756 // Tests all arguments
757 template <class FadType, class ScalarType>
758 void
761  VectorType X(m,ndot), Y(m,ndot);
762  for (unsigned int i=0; i<m; i++) {
763  X[i] = FadType(ndot, real_urand.number());
764  Y[i] = FadType(ndot, real_urand.number());
765  for (unsigned int k=0; k<ndot; k++) {
766  X[i].fastAccessDx(k) = real_urand.number();
767  Y[i].fastAccessDx(k) = real_urand.number();
768  }
769  }
770 
771  Teuchos::BLAS<int,FadType> teuchos_blas;
772  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
773 
774  Teuchos::BLAS<int,FadType> sacado_blas(false);
775  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
776 
777  COMPARE_FADS(z1, z2);
778 
779  unsigned int sz = 2*m*(1+ndot);
780  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
781  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
782 
783  COMPARE_FADS(z1, z3);
784 }
785 
786 // Tests non-unit inc
787 template <class FadType, class ScalarType>
788 void
791  unsigned int incx = 2;
792  unsigned int incy = 3;
793  VectorType X(m*incx,ndot), Y(m*incy,ndot);
794  for (unsigned int i=0; i<m*incx; i++) {
795  X[i] = FadType(ndot, real_urand.number());
796  for (unsigned int k=0; k<ndot; k++) {
797  X[i].fastAccessDx(k) = real_urand.number();
798  }
799  }
800  for (unsigned int i=0; i<m*incy; i++) {
801  Y[i] = FadType(ndot, real_urand.number());
802  for (unsigned int k=0; k<ndot; k++) {
803  Y[i].fastAccessDx(k) = real_urand.number();
804  }
805  }
806 
807  Teuchos::BLAS<int,FadType> teuchos_blas;
808  FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
809 
810  Teuchos::BLAS<int,FadType> sacado_blas(false);
811  FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
812 
813  COMPARE_FADS(z1, z2);
814 
815  unsigned int sz = 2*m*(1+ndot);
816  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
817  FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
818 
819  COMPARE_FADS(z1, z3);
820 }
821 
822 // Tests X constant
823 template <class FadType, class ScalarType>
824 void
827  VectorType X(m,0), Y(m,ndot);
828  std::vector<ScalarType> x(m);
829  for (unsigned int i=0; i<m; i++) {
830  x[i] = urand.number();
831  X[i] = x[i];
832  Y[i] = FadType(ndot, real_urand.number());
833  for (unsigned int k=0; k<ndot; k++) {
834  Y[i].fastAccessDx(k) = real_urand.number();
835  }
836  }
837 
838  Teuchos::BLAS<int,FadType> teuchos_blas;
839  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
840 
841  Teuchos::BLAS<int,FadType> sacado_blas(false);
842  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
843 
844  COMPARE_FADS(z1, z2);
845 
846  unsigned int sz = 2*m*(1+ndot);
847  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
848  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
849 
850  COMPARE_FADS(z1, z3);
851 
852  FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
853 
854  COMPARE_FADS(z1, z4);
855 }
856 
857 // Tests Y constant
858 template <class FadType, class ScalarType>
859 void
862  VectorType X(m,ndot), Y(m,0);
863  std::vector<ScalarType> y(m);
864  for (unsigned int i=0; i<m; i++) {
865  X[i] = FadType(ndot, real_urand.number());
866  y[i] = urand.number();
867  Y[i] = y[i];
868  for (unsigned int k=0; k<ndot; k++) {
869  X[i].fastAccessDx(k) = real_urand.number();
870  }
871  }
872 
873  Teuchos::BLAS<int,FadType> teuchos_blas;
874  FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
875 
876  Teuchos::BLAS<int,FadType> sacado_blas(false);
877  FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
878 
879  COMPARE_FADS(z1, z2);
880 
881  unsigned int sz = 2*m*(1+ndot);
882  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
883  FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
884 
885  COMPARE_FADS(z1, z3);
886 
887  FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
888 
889  COMPARE_FADS(z1, z4);
890 }
891 
892 // Tests all arguments
893 template <class FadType, class ScalarType>
894 void
897  VectorType X(m,ndot);
898  for (unsigned int i=0; i<m; i++) {
899  X[i] = FadType(ndot, real_urand.number());
900  for (unsigned int k=0; k<ndot; k++) {
901  X[i].fastAccessDx(k) = real_urand.number();
902  }
903  }
904 
905  Teuchos::BLAS<int,FadType> teuchos_blas;
907  teuchos_blas.NRM2(m, &X[0], 1);
908 
909  Teuchos::BLAS<int,FadType> sacado_blas(false);
911  sacado_blas.NRM2(m, &X[0], 1);
912 
913  COMPARE_FADS(z1, z2);
914 
915  unsigned int sz = m*(1+ndot);
916  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
918  sacado_blas2.NRM2(m, &X[0], 1);
919 
920  COMPARE_FADS(z1, z3);
921 }
922 
923 // Tests non-unit inc
924 template <class FadType, class ScalarType>
925 void
928  unsigned int incx = 2;
929  VectorType X(m*incx,ndot);
930  for (unsigned int i=0; i<m*incx; i++) {
931  X[i] = FadType(ndot, real_urand.number());
932  for (unsigned int k=0; k<ndot; k++) {
933  X[i].fastAccessDx(k) = real_urand.number();
934  }
935  }
936 
937  Teuchos::BLAS<int,FadType> teuchos_blas;
939  teuchos_blas.NRM2(m, &X[0], incx);
940 
941  Teuchos::BLAS<int,FadType> sacado_blas(false);
943  sacado_blas.NRM2(m, &X[0], incx);
944 
945  COMPARE_FADS(z1, z2);
946 
947  unsigned int sz = m*(1+ndot);
948  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
950  sacado_blas2.NRM2(m, &X[0], incx);
951 
952  COMPARE_FADS(z1, z3);
953 }
954 
955 // Tests all arguments
956 template <class FadType, class ScalarType>
957 void
960  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
961  for (unsigned int j=0; j<n; j++) {
962  for (unsigned int i=0; i<m; i++) {
963  A[i+j*m] = FadType(ndot, urand.number());
964  for (unsigned int k=0; k<ndot; k++)
965  A[i+j*m].fastAccessDx(k) = urand.number();
966  }
967  B[j] = FadType(ndot, urand.number());
968  for (unsigned int k=0; k<ndot; k++)
969  B[j].fastAccessDx(k) = urand.number();
970  }
971  FadType alpha(ndot, urand.number());
972  FadType beta(ndot, urand.number());
973  for (unsigned int k=0; k<ndot; k++) {
974  alpha.fastAccessDx(k) = urand.number();
975  beta.fastAccessDx(k) = urand.number();
976  }
977 
978  for (unsigned int i=0; i<m; i++) {
979  ScalarType val = urand.number();
980  C1[i] = FadType(ndot, val);
981  C2[i] = FadType(ndot, val);
982  C3[i] = FadType(ndot, val);
983  for (unsigned int k=0; k<ndot; k++) {
984  val = urand.number();
985  C1[i].fastAccessDx(k) = val;
986  C2[i].fastAccessDx(k) = val;
987  C3[i].fastAccessDx(k) = val;
988  }
989  }
990 
991  Teuchos::BLAS<int,FadType> teuchos_blas;
992  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
993  beta, &C1[0], 1);
994 
995  Teuchos::BLAS<int,FadType> sacado_blas(false);
996  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
997  beta, &C2[0], 1);
998 
999  COMPARE_FAD_VECTORS(C1, C2, m);
1000 
1001  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1002  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1003  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1004  beta, &C3[0], 1);
1005 
1006  COMPARE_FAD_VECTORS(C1, C3, m);
1007 }
1008 
1009 // Tests non-unit inc and different lda
1010 template <class FadType, class ScalarType>
1011 void
1014  unsigned int lda = m+3;
1015  unsigned int incb = 2;
1016  unsigned int incc = 3;
1017  VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
1018  C3(m*incc,ndot);
1019  for (unsigned int j=0; j<n; j++) {
1020  for (unsigned int i=0; i<lda; i++) {
1021  A[i+j*lda] = FadType(ndot, urand.number());
1022  for (unsigned int k=0; k<ndot; k++)
1023  A[i+j*lda].fastAccessDx(k) = urand.number();
1024  }
1025  }
1026  for (unsigned int j=0; j<n*incb; j++) {
1027  B[j] = FadType(ndot, urand.number());
1028  for (unsigned int k=0; k<ndot; k++)
1029  B[j].fastAccessDx(k) = urand.number();
1030  }
1031  FadType alpha(ndot, urand.number());
1032  FadType beta(ndot, urand.number());
1033  for (unsigned int k=0; k<ndot; k++) {
1034  alpha.fastAccessDx(k) = urand.number();
1035  beta.fastAccessDx(k) = urand.number();
1036  }
1037 
1038  for (unsigned int i=0; i<m*incc; i++) {
1039  ScalarType val = urand.number();
1040  C1[i] = FadType(ndot, val);
1041  C2[i] = FadType(ndot, val);
1042  C3[i] = FadType(ndot, val);
1043  for (unsigned int k=0; k<ndot; k++) {
1044  val = urand.number();
1045  C1[i].fastAccessDx(k) = val;
1046  C2[i].fastAccessDx(k) = val;
1047  C3[i].fastAccessDx(k) = val;
1048  }
1049  }
1050 
1051  Teuchos::BLAS<int,FadType> teuchos_blas;
1052  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1053  beta, &C1[0], incc);
1054 
1055  Teuchos::BLAS<int,FadType> sacado_blas(false);
1056  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1057  beta, &C2[0], incc);
1058 
1059  COMPARE_FAD_VECTORS(C1, C2, m*incc);
1060 
1061  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1062  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1063  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1064  beta, &C3[0], incc);
1065 
1066  COMPARE_FAD_VECTORS(C1, C3, m*incc);
1067 }
1068 
1069 // Tests transpose with all arguments
1070 template <class FadType, class ScalarType>
1071 void
1074  VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
1075  for (unsigned int j=0; j<n; j++) {
1076  for (unsigned int i=0; i<m; i++) {
1077  A[i+j*m] = FadType(ndot, urand.number());
1078  for (unsigned int k=0; k<ndot; k++)
1079  A[i+j*m].fastAccessDx(k) = urand.number();
1080  }
1081  }
1082  for (unsigned int j=0; j<m; j++) {
1083  B[j] = FadType(ndot, urand.number());
1084  for (unsigned int k=0; k<ndot; k++)
1085  B[j].fastAccessDx(k) = urand.number();
1086  }
1087  FadType alpha(ndot, urand.number());
1088  FadType beta(ndot, urand.number());
1089  for (unsigned int k=0; k<ndot; k++) {
1090  alpha.fastAccessDx(k) = urand.number();
1091  beta.fastAccessDx(k) = urand.number();
1092  }
1093 
1094  for (unsigned int i=0; i<n; i++) {
1095  ScalarType val = urand.number();
1096  C1[i] = FadType(ndot, val);
1097  C2[i] = FadType(ndot, val);
1098  C3[i] = FadType(ndot, val);
1099  for (unsigned int k=0; k<ndot; k++) {
1100  val = urand.number();
1101  C1[i].fastAccessDx(k) = val;
1102  C2[i].fastAccessDx(k) = val;
1103  C3[i].fastAccessDx(k) = val;
1104  }
1105  }
1106 
1107  Teuchos::BLAS<int,FadType> teuchos_blas;
1108  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1109  beta, &C1[0], 1);
1110 
1111  Teuchos::BLAS<int,FadType> sacado_blas(false);
1112  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1113  beta, &C2[0], 1);
1114 
1115  COMPARE_FAD_VECTORS(C1, C2, n);
1116 
1117  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1118  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1119  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1120  beta, &C3[0], 1);
1121 
1122  COMPARE_FAD_VECTORS(C1, C3, n);
1123 }
1124 
1125 // Tests transpose with non-unit inc and different lda
1126 template <class FadType, class ScalarType>
1127 void
1130  unsigned int lda = m+3;
1131  unsigned int incb = 2;
1132  unsigned int incc = 3;
1133  VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
1134  C3(n*incc,ndot);
1135  for (unsigned int j=0; j<n; j++) {
1136  for (unsigned int i=0; i<lda; i++) {
1137  A[i+j*lda] = FadType(ndot, urand.number());
1138  for (unsigned int k=0; k<ndot; k++)
1139  A[i+j*lda].fastAccessDx(k) = urand.number();
1140  }
1141  }
1142  for (unsigned int j=0; j<m*incb; j++) {
1143  B[j] = FadType(ndot, urand.number());
1144  for (unsigned int k=0; k<ndot; k++)
1145  B[j].fastAccessDx(k) = urand.number();
1146  }
1147  FadType alpha(ndot, urand.number());
1148  FadType beta(ndot, urand.number());
1149  for (unsigned int k=0; k<ndot; k++) {
1150  alpha.fastAccessDx(k) = urand.number();
1151  beta.fastAccessDx(k) = urand.number();
1152  }
1153 
1154  for (unsigned int i=0; i<n*incc; i++) {
1155  ScalarType val = urand.number();
1156  C1[i] = FadType(ndot, val);
1157  C2[i] = FadType(ndot, val);
1158  C3[i] = FadType(ndot, val);
1159  for (unsigned int k=0; k<ndot; k++) {
1160  val = urand.number();
1161  C1[i].fastAccessDx(k) = val;
1162  C2[i].fastAccessDx(k) = val;
1163  C3[i].fastAccessDx(k) = val;
1164  }
1165  }
1166 
1167  Teuchos::BLAS<int,FadType> teuchos_blas;
1168  teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1169  beta, &C1[0], incc);
1170 
1171  Teuchos::BLAS<int,FadType> sacado_blas(false);
1172  sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1173  beta, &C2[0], incc);
1174 
1175  COMPARE_FAD_VECTORS(C1, C2, n*incc);
1176 
1177  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1178  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1179  sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1180  beta, &C3[0], incc);
1181 
1182  COMPARE_FAD_VECTORS(C1, C3, n*incc);
1183 }
1184 
1185 // Tests with constant C
1186 template <class FadType, class ScalarType>
1187 void
1190  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1191  for (unsigned int j=0; j<n; j++) {
1192  for (unsigned int i=0; i<m; i++) {
1193  A[i+j*m] = FadType(ndot, urand.number());
1194  for (unsigned int k=0; k<ndot; k++)
1195  A[i+j*m].fastAccessDx(k) = urand.number();
1196  }
1197  B[j] = FadType(ndot, urand.number());
1198  for (unsigned int k=0; k<ndot; k++)
1199  B[j].fastAccessDx(k) = urand.number();
1200  }
1201  FadType alpha(ndot, urand.number());
1202  FadType beta(ndot, urand.number());
1203  for (unsigned int k=0; k<ndot; k++) {
1204  alpha.fastAccessDx(k) = urand.number();
1205  beta.fastAccessDx(k) = urand.number();
1206  }
1207 
1208  for (unsigned int i=0; i<m; i++) {
1209  ScalarType val = urand.number();
1210  C1[i] = val;
1211  C2[i] = val;
1212  C3[i] = val;
1213  }
1214 
1215  Teuchos::BLAS<int,FadType> teuchos_blas;
1216  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1217  beta, &C1[0], 1);
1218 
1219  Teuchos::BLAS<int,FadType> sacado_blas(false);
1220  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1221  beta, &C2[0], 1);
1222 
1223  COMPARE_FAD_VECTORS(C1, C2, m);
1224 
1225  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1226  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1227  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1228  beta, &C3[0], 1);
1229 
1230  COMPARE_FAD_VECTORS(C1, C3, m);
1231 }
1232 
1233 // Tests with constant alpha, beta
1234 template <class FadType, class ScalarType>
1235 void
1238  VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1239  for (unsigned int j=0; j<n; j++) {
1240  for (unsigned int i=0; i<m; i++) {
1241  A[i+j*m] = FadType(ndot, urand.number());
1242  for (unsigned int k=0; k<ndot; k++)
1243  A[i+j*m].fastAccessDx(k) = urand.number();
1244  }
1245  B[j] = FadType(ndot, urand.number());
1246  for (unsigned int k=0; k<ndot; k++)
1247  B[j].fastAccessDx(k) = urand.number();
1248  }
1249  ScalarType alpha = urand.number();
1250  ScalarType beta = urand.number();
1251 
1252  for (unsigned int i=0; i<m; i++) {
1253  ScalarType val = urand.number();
1254  C1[i] = FadType(ndot, val);
1255  C2[i] = FadType(ndot, val);
1256  C3[i] = FadType(ndot, val);
1257  for (unsigned int k=0; k<ndot; k++) {
1258  val = urand.number();
1259  C1[i].fastAccessDx(k) = val;
1260  C2[i].fastAccessDx(k) = val;
1261  C3[i].fastAccessDx(k) = val;
1262  }
1263  }
1264 
1265  Teuchos::BLAS<int,FadType> teuchos_blas;
1266  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1267  beta, &C1[0], 1);
1268 
1269  Teuchos::BLAS<int,FadType> sacado_blas(false);
1270  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1271  beta, &C2[0], 1);
1272 
1273  COMPARE_FAD_VECTORS(C1, C2, m);
1274 
1275  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1276  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1277  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1278  beta, &C3[0], 1);
1279 
1280  COMPARE_FAD_VECTORS(C1, C3, m);
1281 }
1282 
1283 // Tests wth constant B
1284 template <class FadType, class ScalarType>
1285 void
1288  VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1289  C4(m,ndot);
1290  std::vector<ScalarType> b(n);
1291  for (unsigned int j=0; j<n; j++) {
1292  for (unsigned int i=0; i<m; i++) {
1293  A[i+j*m] = FadType(ndot, urand.number());
1294  for (unsigned int k=0; k<ndot; k++)
1295  A[i+j*m].fastAccessDx(k) = urand.number();
1296  }
1297  b[j] = urand.number();
1298  B[j] = b[j];
1299  }
1300  FadType alpha(ndot, urand.number());
1301  FadType beta(ndot, urand.number());
1302  for (unsigned int k=0; k<ndot; k++) {
1303  alpha.fastAccessDx(k) = urand.number();
1304  beta.fastAccessDx(k) = urand.number();
1305  }
1306 
1307  for (unsigned int i=0; i<m; i++) {
1308  ScalarType val = urand.number();
1309  C1[i] = FadType(ndot, val);
1310  C2[i] = FadType(ndot, val);
1311  C3[i] = FadType(ndot, val);
1312  C4[i] = FadType(ndot, val);
1313  for (unsigned int k=0; k<ndot; k++) {
1314  val = urand.number();
1315  C1[i].fastAccessDx(k) = val;
1316  C2[i].fastAccessDx(k) = val;
1317  C3[i].fastAccessDx(k) = val;
1318  C4[i].fastAccessDx(k) = val;
1319  }
1320  }
1321 
1322  Teuchos::BLAS<int,FadType> teuchos_blas;
1323  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1324  beta, &C1[0], 1);
1325 
1326  Teuchos::BLAS<int,FadType> sacado_blas(false);
1327  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1328  beta, &C2[0], 1);
1329 
1330  COMPARE_FAD_VECTORS(C1, C2, m);
1331 
1332  unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1333  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1334  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1335  beta, &C3[0], 1);
1336 
1337  COMPARE_FAD_VECTORS(C1, C3, m);
1338 
1339  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1,
1340  beta, &C4[0], 1);
1341 
1342  COMPARE_FAD_VECTORS(C1, C4, m);
1343 }
1344 
1345 // Tests with constant A
1346 template <class FadType, class ScalarType>
1347 void
1350  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1351  C4(m,ndot);
1352  std::vector<ScalarType> a(m*n);
1353  for (unsigned int j=0; j<n; j++) {
1354  for (unsigned int i=0; i<m; i++) {
1355  a[i+j*m] = urand.number();
1356  A[i+j*m] = a[i+j*m];
1357  }
1358  B[j] = FadType(ndot, urand.number());
1359  for (unsigned int k=0; k<ndot; k++)
1360  B[j].fastAccessDx(k) = urand.number();
1361  }
1362  FadType alpha(ndot, urand.number());
1363  FadType beta(ndot, urand.number());
1364  for (unsigned int k=0; k<ndot; k++) {
1365  alpha.fastAccessDx(k) = urand.number();
1366  beta.fastAccessDx(k) = urand.number();
1367  }
1368 
1369  for (unsigned int i=0; i<m; i++) {
1370  ScalarType val = urand.number();
1371  C1[i] = FadType(ndot, val);
1372  C2[i] = FadType(ndot, val);
1373  C3[i] = FadType(ndot, val);
1374  C4[i] = FadType(ndot, val);
1375  for (unsigned int k=0; k<ndot; k++) {
1376  val = urand.number();
1377  C1[i].fastAccessDx(k) = val;
1378  C2[i].fastAccessDx(k) = val;
1379  C3[i].fastAccessDx(k) = val;
1380  C4[i].fastAccessDx(k) = val;
1381  }
1382  }
1383 
1384  Teuchos::BLAS<int,FadType> teuchos_blas;
1385  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1386  beta, &C1[0], 1);
1387 
1388  Teuchos::BLAS<int,FadType> sacado_blas(false);
1389  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1390  beta, &C2[0], 1);
1391 
1392  COMPARE_FAD_VECTORS(C1, C2, m);
1393 
1394  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1395  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1396  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1397  beta, &C3[0], 1);
1398 
1399  COMPARE_FAD_VECTORS(C1, C3, m);
1400 
1401  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1,
1402  beta, &C4[0], 1);
1403 
1404  COMPARE_FAD_VECTORS(C1, C4, m);
1405 }
1406 
1407 // Tests with constant A, B
1408 template <class FadType, class ScalarType>
1409 void
1412  VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1413  C4(m,ndot);
1414  std::vector<ScalarType> a(m*n), b(n);
1415  for (unsigned int j=0; j<n; j++) {
1416  for (unsigned int i=0; i<m; i++) {
1417  a[i+j*m] = urand.number();
1418  A[i+j*m] = a[i+j*m];
1419  }
1420  b[j] = urand.number();
1421  B[j] = b[j];
1422  }
1423  FadType alpha(ndot, urand.number());
1424  FadType beta(ndot, urand.number());
1425  for (unsigned int k=0; k<ndot; k++) {
1426  alpha.fastAccessDx(k) = urand.number();
1427  beta.fastAccessDx(k) = urand.number();
1428  }
1429 
1430  for (unsigned int i=0; i<m; i++) {
1431  ScalarType val = urand.number();
1432  C1[i] = FadType(ndot, val);
1433  C2[i] = FadType(ndot, val);
1434  C3[i] = FadType(ndot, val);
1435  C4[i] = FadType(ndot, val);
1436  for (unsigned int k=0; k<ndot; k++) {
1437  val = urand.number();
1438  C1[i].fastAccessDx(k) = val;
1439  C2[i].fastAccessDx(k) = val;
1440  C3[i].fastAccessDx(k) = val;
1441  C4[i].fastAccessDx(k) = val;
1442  }
1443  }
1444 
1445  Teuchos::BLAS<int,FadType> teuchos_blas;
1446  teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1447  beta, &C1[0], 1);
1448 
1449  Teuchos::BLAS<int,FadType> sacado_blas(false);
1450  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1451  beta, &C2[0], 1);
1452 
1453  COMPARE_FAD_VECTORS(C1, C2, m);
1454 
1455  unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1456  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1457  sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1458  beta, &C3[0], 1);
1459 
1460  COMPARE_FAD_VECTORS(C1, C3, m);
1461 
1462  sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1,
1463  beta, &C4[0], 1);
1464 
1465  COMPARE_FAD_VECTORS(C1, C4, m);
1466 }
1467 
1468 // Tests all arguments
1469 template <class FadType, class ScalarType>
1470 void
1473  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1474  for (unsigned int j=0; j<n; j++) {
1475  for (unsigned int i=0; i<n; i++) {
1476  A[i+j*n] = FadType(ndot, urand.number());
1477  for (unsigned int k=0; k<ndot; k++)
1478  A[i+j*n].fastAccessDx(k) = urand.number();
1479  }
1480  ScalarType val = urand.number();
1481  x1[j] = FadType(ndot, val);
1482  x2[j] = FadType(ndot, val);
1483  x3[j] = FadType(ndot, val);
1484  for (unsigned int k=0; k<ndot; k++) {
1485  val = urand.number();
1486  x1[j].fastAccessDx(k) = val;
1487  x2[j].fastAccessDx(k) = val;
1488  x3[j].fastAccessDx(k) = val;
1489  }
1490  }
1491 
1492  Teuchos::BLAS<int,FadType> teuchos_blas;
1493  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1494  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1495 
1496  Teuchos::BLAS<int,FadType> sacado_blas(false);
1498  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1499 
1500  COMPARE_FAD_VECTORS(x1, x2, n);
1501 
1502  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1503  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1504  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1505  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1506 
1507  COMPARE_FAD_VECTORS(x1, x3, n);
1508 
1509  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1510  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1512  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1513  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1514  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1515  COMPARE_FAD_VECTORS(x1, x2, n);
1516  COMPARE_FAD_VECTORS(x1, x3, n);
1517 
1518  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1519  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1520  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1521  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1522  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1523  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1524  COMPARE_FAD_VECTORS(x1, x2, n);
1525  COMPARE_FAD_VECTORS(x1, x3, n);
1526 
1527  for (unsigned int i=0; i<n; i++) {
1528  A[i*n+i].val() = 1.0;
1529  for (unsigned int k=0; k<ndot; k++)
1530  A[i*n+i].fastAccessDx(k) = 0.0;
1531  }
1532  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1533  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1535  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1536  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1537  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1538  COMPARE_FAD_VECTORS(x1, x2, n);
1539  COMPARE_FAD_VECTORS(x1, x3, n);
1540 }
1541 
1542 // Tests non unit inc, different lda
1543 template <class FadType, class ScalarType>
1544 void
1547  unsigned int lda = n+3;
1548  unsigned int incx = 2;
1549  VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1550  for (unsigned int j=0; j<n; j++) {
1551  for (unsigned int i=0; i<lda; i++) {
1552  A[i+j*lda] = FadType(ndot, urand.number());
1553  for (unsigned int k=0; k<ndot; k++)
1554  A[i+j*lda].fastAccessDx(k) = urand.number();
1555  }
1556  }
1557  for (unsigned int j=0; j<n*incx; j++) {
1558  ScalarType val = urand.number();
1559  x1[j] = FadType(ndot, val);
1560  x2[j] = FadType(ndot, val);
1561  x3[j] = FadType(ndot, val);
1562  for (unsigned int k=0; k<ndot; k++) {
1563  val = urand.number();
1564  x1[j].fastAccessDx(k) = val;
1565  x2[j].fastAccessDx(k) = val;
1566  x3[j].fastAccessDx(k) = val;
1567  }
1568  }
1569 
1570  Teuchos::BLAS<int,FadType> teuchos_blas;
1571  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1572  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1573 
1574  Teuchos::BLAS<int,FadType> sacado_blas(false);
1576  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1577 
1578  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1579 
1580  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1581  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1582  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1583  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1584 
1585  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1586 
1587  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1588  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1590  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1591  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1592  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1593  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1594  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1595 
1596  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1597  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1598  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1599  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1600  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1601  Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1602  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1603  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1604 
1605  for (unsigned int i=0; i<n; i++) {
1606  A[i*lda+i].val() = 1.0;
1607  for (unsigned int k=0; k<ndot; k++)
1608  A[i*lda+i].fastAccessDx(k) = 0.0;
1609  }
1610  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1611  Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1613  Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1614  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1615  Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1616  COMPARE_FAD_VECTORS(x1, x2, n*incx);
1617  COMPARE_FAD_VECTORS(x1, x3, n*incx);
1618 }
1619 
1620 // Tests A constant
1621 template <class FadType, class ScalarType>
1622 void
1625  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1626  x5(n,ndot);
1627  std::vector<ScalarType> a(n*n);
1628  for (unsigned int j=0; j<n; j++) {
1629  for (unsigned int i=0; i<n; i++) {
1630  a[i+j*n] = urand.number();
1631  A[i+j*n] = a[i+j*n];
1632  }
1633  ScalarType val = urand.number();
1634  x1[j] = FadType(ndot, val);
1635  x2[j] = FadType(ndot, val);
1636  x3[j] = FadType(ndot, val);
1637  x4[j] = FadType(ndot, val);
1638  x5[j] = FadType(ndot, val);
1639  for (unsigned int k=0; k<ndot; k++) {
1640  val = urand.number();
1641  x1[j].fastAccessDx(k) = val;
1642  x2[j].fastAccessDx(k) = val;
1643  x3[j].fastAccessDx(k) = val;
1644  x4[j].fastAccessDx(k) = val;
1645  x5[j].fastAccessDx(k) = val;
1646  }
1647  }
1648 
1649  Teuchos::BLAS<int,FadType> teuchos_blas;
1650  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1651  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1652 
1653  Teuchos::BLAS<int,FadType> sacado_blas(false);
1655  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1656 
1657  COMPARE_FAD_VECTORS(x1, x2, n);
1658 
1659  unsigned int sz = n*n+n*(1+ndot);
1660  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1661  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1662  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1663 
1664  COMPARE_FAD_VECTORS(x1, x3, n);
1665 
1667  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1668 
1669  COMPARE_FAD_VECTORS(x1, x4, n);
1670 
1671  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1672  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
1673 
1674  COMPARE_FAD_VECTORS(x1, x5, n);
1675 
1676  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1677  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1679  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1680  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1681  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1683  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1684  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1685  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1686  COMPARE_FAD_VECTORS(x1, x2, n);
1687  COMPARE_FAD_VECTORS(x1, x3, n);
1688  COMPARE_FAD_VECTORS(x1, x4, n);
1689  COMPARE_FAD_VECTORS(x1, x5, n);
1690 
1691  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1692  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1693  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1694  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1695  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1696  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1697  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1698  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1699  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1700  Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1701  COMPARE_FAD_VECTORS(x1, x2, n);
1702  COMPARE_FAD_VECTORS(x1, x3, n);
1703  COMPARE_FAD_VECTORS(x1, x4, n);
1704  COMPARE_FAD_VECTORS(x1, x5, n);
1705 
1706  for (unsigned int i=0; i<n; i++) {
1707  A[i*n+i].val() = 1.0;
1708  for (unsigned int k=0; k<ndot; k++)
1709  A[i*n+i].fastAccessDx(k) = 0.0;
1710  }
1711  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1712  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1714  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1715  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1716  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1718  Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1719  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1720  Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1721  COMPARE_FAD_VECTORS(x1, x2, n);
1722  COMPARE_FAD_VECTORS(x1, x3, n);
1723  COMPARE_FAD_VECTORS(x1, x4, n);
1724  COMPARE_FAD_VECTORS(x1, x5, n);
1725 }
1726 
1727 // Tests x constant
1728 template <class FadType, class ScalarType>
1729 void
1732  VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1733  for (unsigned int j=0; j<n; j++) {
1734  for (unsigned int i=0; i<n; i++) {
1735  A[i+j*n] = FadType(ndot, urand.number());
1736  for (unsigned int k=0; k<ndot; k++)
1737  A[i+j*n].fastAccessDx(k) = urand.number();
1738  }
1739  ScalarType val = urand.number();
1740  x1[j] = val;
1741  x2[j] = val;
1742  x3[j] = val;
1743  }
1744 
1745  Teuchos::BLAS<int,FadType> teuchos_blas;
1746  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1747  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1748 
1749  Teuchos::BLAS<int,FadType> sacado_blas(false);
1751  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1752 
1753  COMPARE_FAD_VECTORS(x1, x2, n);
1754 
1755  unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1756  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1757  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1758  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1759 
1760  COMPARE_FAD_VECTORS(x1, x3, n);
1761 
1762  teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1763  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1765  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1766  sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1767  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1768  COMPARE_FAD_VECTORS(x1, x2, n);
1769  COMPARE_FAD_VECTORS(x1, x3, n);
1770 
1771  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1772  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1773  sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1774  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1775  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1776  Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1777  COMPARE_FAD_VECTORS(x1, x2, n);
1778  COMPARE_FAD_VECTORS(x1, x3, n);
1779 
1780  for (unsigned int i=0; i<n; i++) {
1781  A[i*n+i].val() = 1.0;
1782  for (unsigned int k=0; k<ndot; k++)
1783  A[i*n+i].fastAccessDx(k) = 0.0;
1784  }
1785  teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1786  Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1788  Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1789  sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1790  Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1791  COMPARE_FAD_VECTORS(x1, x2, n);
1792  COMPARE_FAD_VECTORS(x1, x3, n);
1793 }
1794 
1795 // Tests all arguments
1796 template <class FadType, class ScalarType>
1797 void
1800 
1801  // GER is apparently not defined in the BLAS for complex types
1803  return;
1804 
1805  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1806  for (unsigned int j=0; j<n; j++) {
1807  for (unsigned int i=0; i<m; i++) {
1808  ScalarType val = urand.number();
1809  A1[i+j*m] = FadType(ndot, val);
1810  A2[i+j*m] = FadType(ndot, val);
1811  A3[i+j*m] = FadType(ndot, val);
1812  for (unsigned int k=0; k<ndot; k++) {
1813  val = urand.number();
1814  A1[i+j*m].fastAccessDx(k) = val;
1815  A2[i+j*m].fastAccessDx(k) = val;
1816  A3[i+j*m].fastAccessDx(k) = val;
1817  }
1818  }
1819  }
1820  for (unsigned int i=0; i<m; i++) {
1821  x[i] = FadType(ndot, urand.number());
1822  for (unsigned int k=0; k<ndot; k++)
1823  x[i].fastAccessDx(k) = urand.number();
1824  }
1825  for (unsigned int i=0; i<n; i++) {
1826  y[i] = FadType(ndot, urand.number());
1827  for (unsigned int k=0; k<ndot; k++)
1828  y[i].fastAccessDx(k) = urand.number();
1829  }
1830  FadType alpha(ndot, urand.number());
1831  for (unsigned int k=0; k<ndot; k++) {
1832  alpha.fastAccessDx(k) = urand.number();
1833  }
1834 
1835  Teuchos::BLAS<int,FadType> teuchos_blas;
1836  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1837 
1838  Teuchos::BLAS<int,FadType> sacado_blas(false);
1839  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1840 
1841  COMPARE_FAD_VECTORS(A1, A2, m*n);
1842 
1843  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1844  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1845  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1846 
1847  COMPARE_FAD_VECTORS(A1, A3, m*n);
1848 }
1849 
1850 // Tests non unit inc, different lda
1851 template <class FadType, class ScalarType>
1852 void
1855 
1856  // GER is apparently not defined in the BLAS for complex types
1858  return;
1859 
1860  unsigned int lda = m+3;
1861  unsigned int incx = 2;
1862  unsigned int incy = 3;
1863  VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1864  y(n*incy,ndot);
1865  for (unsigned int j=0; j<n; j++) {
1866  for (unsigned int i=0; i<lda; i++) {
1867  ScalarType val = urand.number();
1868  A1[i+j*lda] = FadType(ndot, val);
1869  A2[i+j*lda] = FadType(ndot, val);
1870  A3[i+j*lda] = FadType(ndot, val);
1871  for (unsigned int k=0; k<ndot; k++) {
1872  val = urand.number();
1873  A1[i+j*lda].fastAccessDx(k) = val;
1874  A2[i+j*lda].fastAccessDx(k) = val;
1875  A3[i+j*lda].fastAccessDx(k) = val;
1876  }
1877  }
1878  }
1879  for (unsigned int i=0; i<m*incx; i++) {
1880  x[i] = FadType(ndot, urand.number());
1881  for (unsigned int k=0; k<ndot; k++)
1882  x[i].fastAccessDx(k) = urand.number();
1883  }
1884  for (unsigned int i=0; i<n*incy; i++) {
1885  y[i] = FadType(ndot, urand.number());
1886  for (unsigned int k=0; k<ndot; k++)
1887  y[i].fastAccessDx(k) = urand.number();
1888  }
1889  FadType alpha(ndot, urand.number());
1890  for (unsigned int k=0; k<ndot; k++) {
1891  alpha.fastAccessDx(k) = urand.number();
1892  }
1893 
1894  Teuchos::BLAS<int,FadType> teuchos_blas;
1895  teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1896 
1897  Teuchos::BLAS<int,FadType> sacado_blas(false);
1898  sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1899 
1900  COMPARE_FAD_VECTORS(A1, A2, lda*n);
1901 
1902  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1903  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1904  sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1905 
1906  COMPARE_FAD_VECTORS(A1, A3, lda*n);
1907 }
1908 
1909 // Tests constant alpha
1910 template <class FadType, class ScalarType>
1911 void
1914 
1915  // GER is apparently not defined in the BLAS for complex types
1917  return;
1918 
1919  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1920  for (unsigned int j=0; j<n; j++) {
1921  for (unsigned int i=0; i<m; i++) {
1922  ScalarType val = urand.number();
1923  A1[i+j*m] = FadType(ndot, val);
1924  A2[i+j*m] = FadType(ndot, val);
1925  A3[i+j*m] = FadType(ndot, val);
1926  for (unsigned int k=0; k<ndot; k++) {
1927  val = urand.number();
1928  A1[i+j*m].fastAccessDx(k) = val;
1929  A2[i+j*m].fastAccessDx(k) = val;
1930  A3[i+j*m].fastAccessDx(k) = val;
1931  }
1932  }
1933  }
1934  for (unsigned int i=0; i<m; i++) {
1935  x[i] = FadType(ndot, urand.number());
1936  for (unsigned int k=0; k<ndot; k++)
1937  x[i].fastAccessDx(k) = urand.number();
1938  }
1939  for (unsigned int i=0; i<n; i++) {
1940  y[i] = FadType(ndot, urand.number());
1941  for (unsigned int k=0; k<ndot; k++)
1942  y[i].fastAccessDx(k) = urand.number();
1943  }
1944  ScalarType alpha = urand.number();
1945 
1946  Teuchos::BLAS<int,FadType> teuchos_blas;
1947  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1948 
1949  Teuchos::BLAS<int,FadType> sacado_blas(false);
1950  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1951 
1952  COMPARE_FAD_VECTORS(A1, A2, m*n);
1953 
1954  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1955  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1956  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1957 
1958  COMPARE_FAD_VECTORS(A1, A3, m*n);
1959 }
1960 
1961 // Tests constant x
1962 template <class FadType, class ScalarType>
1963 void
1966 
1967  // GER is apparently not defined in the BLAS for complex types
1969  return;
1970 
1971  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1972  A5(m*n,ndot), x(m,ndot), y(n,ndot);
1973  std::vector<ScalarType> xx(m);
1974  for (unsigned int j=0; j<n; j++) {
1975  for (unsigned int i=0; i<m; i++) {
1976  ScalarType val = urand.number();
1977  A1[i+j*m] = FadType(ndot, val);
1978  A2[i+j*m] = FadType(ndot, val);
1979  A3[i+j*m] = FadType(ndot, val);
1980  A4[i+j*m] = FadType(ndot, val);
1981  A5[i+j*m] = FadType(ndot, val);
1982  for (unsigned int k=0; k<ndot; k++) {
1983  val = urand.number();
1984  A1[i+j*m].fastAccessDx(k) = val;
1985  A2[i+j*m].fastAccessDx(k) = val;
1986  A3[i+j*m].fastAccessDx(k) = val;
1987  A4[i+j*m].fastAccessDx(k) = val;
1988  A5[i+j*m].fastAccessDx(k) = val;
1989  }
1990  }
1991  }
1992  for (unsigned int i=0; i<m; i++) {
1993  xx[i] = urand.number();
1994  x[i] = xx[i];
1995  }
1996  for (unsigned int i=0; i<n; i++) {
1997  y[i] = FadType(ndot, urand.number());
1998  for (unsigned int k=0; k<ndot; k++)
1999  y[i].fastAccessDx(k) = urand.number();
2000  }
2001  FadType alpha(ndot, urand.number());
2002  for (unsigned int k=0; k<ndot; k++) {
2003  alpha.fastAccessDx(k) = urand.number();
2004  }
2005 
2006  Teuchos::BLAS<int,FadType> teuchos_blas;
2007  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2008 
2009  Teuchos::BLAS<int,FadType> sacado_blas(false);
2010  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2011 
2012  COMPARE_FAD_VECTORS(A1, A2, m*n);
2013 
2014  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
2015  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2016  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2017 
2018  COMPARE_FAD_VECTORS(A1, A3, m*n);
2019 
2020  sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
2021 
2022  COMPARE_FAD_VECTORS(A1, A4, m*n);
2023 
2024  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
2025 
2026  COMPARE_FAD_VECTORS(A1, A5, m*n);
2027 }
2028 
2029 // Tests constant y
2030 template <class FadType, class ScalarType>
2031 void
2034 
2035  // GER is apparently not defined in the BLAS for complex types
2037  return;
2038 
2039  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2040  A5(m*n,ndot), x(m,ndot), y(n,ndot);
2041  std::vector<ScalarType> yy(n);
2042  for (unsigned int j=0; j<n; j++) {
2043  for (unsigned int i=0; i<m; i++) {
2044  ScalarType val = urand.number();
2045  A1[i+j*m] = FadType(ndot, val);
2046  A2[i+j*m] = FadType(ndot, val);
2047  A3[i+j*m] = FadType(ndot, val);
2048  A4[i+j*m] = FadType(ndot, val);
2049  A5[i+j*m] = FadType(ndot, val);
2050  for (unsigned int k=0; k<ndot; k++) {
2051  val = urand.number();
2052  A1[i+j*m].fastAccessDx(k) = val;
2053  A2[i+j*m].fastAccessDx(k) = val;
2054  A3[i+j*m].fastAccessDx(k) = val;
2055  A4[i+j*m].fastAccessDx(k) = val;
2056  A5[i+j*m].fastAccessDx(k) = val;
2057  }
2058  }
2059  }
2060  for (unsigned int i=0; i<m; i++) {
2061  x[i] = FadType(ndot, urand.number());
2062  for (unsigned int k=0; k<ndot; k++)
2063  x[i].fastAccessDx(k) = urand.number();
2064  }
2065  for (unsigned int i=0; i<n; i++) {
2066  yy[i] = urand.number();
2067  y[i] = yy[i];
2068  }
2069  FadType alpha(ndot, urand.number());
2070  for (unsigned int k=0; k<ndot; k++) {
2071  alpha.fastAccessDx(k) = urand.number();
2072  }
2073 
2074  Teuchos::BLAS<int,FadType> teuchos_blas;
2075  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2076 
2077  Teuchos::BLAS<int,FadType> sacado_blas(false);
2078  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2079 
2080  COMPARE_FAD_VECTORS(A1, A2, m*n);
2081 
2082  unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
2083  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2084  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2085 
2086  COMPARE_FAD_VECTORS(A1, A3, m*n);
2087 
2088  sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
2089 
2090  COMPARE_FAD_VECTORS(A1, A4, m*n);
2091 
2092  sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
2093 
2094  COMPARE_FAD_VECTORS(A1, A5, m*n);
2095 }
2096 
2097 // Tests constant x and y
2098 template <class FadType, class ScalarType>
2099 void
2102 
2103  // GER is apparently not defined in the BLAS for complex types
2105  return;
2106 
2107  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2108  A5(m*n,ndot), x(m,ndot), y(n,ndot);
2109  std::vector<ScalarType> xx(n), yy(n);
2110  for (unsigned int j=0; j<n; j++) {
2111  for (unsigned int i=0; i<m; i++) {
2112  ScalarType val = urand.number();
2113  A1[i+j*m] = FadType(ndot, val);
2114  A2[i+j*m] = FadType(ndot, val);
2115  A3[i+j*m] = FadType(ndot, val);
2116  A4[i+j*m] = FadType(ndot, val);
2117  A5[i+j*m] = FadType(ndot, val);
2118  for (unsigned int k=0; k<ndot; k++) {
2119  val = urand.number();
2120  A1[i+j*m].fastAccessDx(k) = val;
2121  A2[i+j*m].fastAccessDx(k) = val;
2122  A3[i+j*m].fastAccessDx(k) = val;
2123  A4[i+j*m].fastAccessDx(k) = val;
2124  A5[i+j*m].fastAccessDx(k) = val;
2125  }
2126  }
2127  }
2128  for (unsigned int i=0; i<m; i++) {
2129  xx[i] = urand.number();
2130  x[i] = xx[i];
2131  }
2132  for (unsigned int i=0; i<n; i++) {
2133  yy[i] = urand.number();
2134  y[i] = yy[i];
2135  }
2136  FadType alpha(ndot, urand.number());
2137  for (unsigned int k=0; k<ndot; k++) {
2138  alpha.fastAccessDx(k) = urand.number();
2139  }
2140 
2141  Teuchos::BLAS<int,FadType> teuchos_blas;
2142  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2143 
2144  Teuchos::BLAS<int,FadType> sacado_blas(false);
2145  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2146 
2147  COMPARE_FAD_VECTORS(A1, A2, m*n);
2148 
2149  unsigned int sz = m*n*(1+ndot) + m + n;
2150  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2151  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2152 
2153  COMPARE_FAD_VECTORS(A1, A3, m*n);
2154 
2155  sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2156 
2157  COMPARE_FAD_VECTORS(A1, A4, m*n);
2158 
2159  sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2160 
2161  COMPARE_FAD_VECTORS(A1, A5, m*n);
2162 }
2163 
2164 // Tests constant A
2165 template <class FadType, class ScalarType>
2166 void
2169 
2170  // GER is apparently not defined in the BLAS for complex types
2172  return;
2173 
2174  VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2175  for (unsigned int j=0; j<n; j++) {
2176  for (unsigned int i=0; i<m; i++) {
2177  ScalarType val = urand.number();
2178  A1[i+j*m] = val;
2179  A2[i+j*m] = val;
2180  A3[i+j*m] = val;
2181  }
2182  }
2183  for (unsigned int i=0; i<m; i++) {
2184  x[i] = FadType(ndot, urand.number());
2185  for (unsigned int k=0; k<ndot; k++)
2186  x[i].fastAccessDx(k) = urand.number();
2187  }
2188  for (unsigned int i=0; i<n; i++) {
2189  y[i] = FadType(ndot, urand.number());
2190  for (unsigned int k=0; k<ndot; k++)
2191  y[i].fastAccessDx(k) = urand.number();
2192  }
2193  FadType alpha(ndot, urand.number());
2194  for (unsigned int k=0; k<ndot; k++) {
2195  alpha.fastAccessDx(k) = urand.number();
2196  }
2197 
2198  Teuchos::BLAS<int,FadType> teuchos_blas;
2199  teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2200 
2201  Teuchos::BLAS<int,FadType> sacado_blas(false);
2202  sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2203 
2204  COMPARE_FAD_VECTORS(A1, A2, m*n);
2205 
2206  unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2207  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2208  sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2209 
2210  COMPARE_FAD_VECTORS(A1, A3, m*n);
2211 }
2212 
2213 // Tests all arguments
2214 template <class FadType, class ScalarType>
2215 void
2218  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2219  for (unsigned int j=0; j<l; j++) {
2220  for (unsigned int i=0; i<m; i++) {
2221  A[i+j*m] = FadType(ndot, urand.number());
2222  for (unsigned int k=0; k<ndot; k++)
2223  A[i+j*m].fastAccessDx(k) = urand.number();
2224  }
2225  }
2226  for (unsigned int j=0; j<n; j++) {
2227  for (unsigned int i=0; i<l; i++) {
2228  B[i+j*l] = FadType(ndot, urand.number());
2229  for (unsigned int k=0; k<ndot; k++)
2230  B[i+j*l].fastAccessDx(k) = urand.number();
2231  }
2232  }
2233  FadType alpha(ndot, urand.number());
2234  FadType beta(ndot, urand.number());
2235  for (unsigned int k=0; k<ndot; k++) {
2236  alpha.fastAccessDx(k) = urand.number();
2237  beta.fastAccessDx(k) = urand.number();
2238  }
2239 
2240  for (unsigned int j=0; j<n; j++) {
2241  for (unsigned int i=0; i<m; i++) {
2242  ScalarType val = urand.number();
2243  C1[i+j*m] = FadType(ndot, val);
2244  C2[i+j*m] = FadType(ndot, val);
2245  C3[i+j*m] = FadType(ndot, val);
2246  for (unsigned int k=0; k<ndot; k++) {
2247  val = urand.number();
2248  C1[i+j*m].fastAccessDx(k) = val;
2249  C2[i+j*m].fastAccessDx(k) = val;
2250  C3[i+j*m].fastAccessDx(k) = val;
2251  }
2252  }
2253  }
2254 
2255  Teuchos::BLAS<int,FadType> teuchos_blas;
2256  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2257  &A[0], m, &B[0], l, beta, &C1[0], m);
2258 
2259  Teuchos::BLAS<int,FadType> sacado_blas(false);
2260  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2261  &A[0], m, &B[0], l, beta, &C2[0], m);
2262 
2263  COMPARE_FAD_VECTORS(C1, C2, m*n);
2264 
2265  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2266  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2267  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2268  &A[0], m, &B[0], l, beta, &C3[0], m);
2269 
2270  COMPARE_FAD_VECTORS(C1, C3, m*n);
2271 
2272  // transa
2273  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2274  &A[0], l, &B[0], l, beta, &C1[0], m);
2275  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2276  &A[0], l, &B[0], l, beta, &C2[0], m);
2277  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278  &A[0], l, &B[0], l, beta, &C3[0], m);
2279 
2280  COMPARE_FAD_VECTORS(C1, C2, m*n);
2281  COMPARE_FAD_VECTORS(C1, C3, m*n);
2282 
2283  // transb
2284  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2285  &A[0], m, &B[0], n, beta, &C1[0], m);
2286  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2287  &A[0], m, &B[0], n, beta, &C2[0], m);
2288  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2289  &A[0], m, &B[0], n, beta, &C3[0], m);
2290 
2291  COMPARE_FAD_VECTORS(C1, C2, m*n);
2292  COMPARE_FAD_VECTORS(C1, C3, m*n);
2293 
2294  // transa and transb
2295  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2296  &A[0], l, &B[0], n, beta, &C1[0], m);
2297  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2298  &A[0], l, &B[0], n, beta, &C2[0], m);
2299  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2300  &A[0], l, &B[0], n, beta, &C3[0], m);
2301 
2302  COMPARE_FAD_VECTORS(C1, C2, m*n);
2303  COMPARE_FAD_VECTORS(C1, C3, m*n);
2304 }
2305 
2306 // Tests different lda, ldb, ldc
2307 template <class FadType, class ScalarType>
2308 void
2311  unsigned int lda = m+4;
2312  unsigned int ldb = l+4;
2313  unsigned int ldc = m+5;
2314  VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2315  C3(ldc*n,ndot);
2316  for (unsigned int j=0; j<l; j++) {
2317  for (unsigned int i=0; i<lda; i++) {
2318  A[i+j*lda] = FadType(ndot, urand.number());
2319  for (unsigned int k=0; k<ndot; k++)
2320  A[i+j*lda].fastAccessDx(k) = urand.number();
2321  }
2322  }
2323  for (unsigned int j=0; j<n; j++) {
2324  for (unsigned int i=0; i<ldb; i++) {
2325  B[i+j*ldb] = FadType(ndot, urand.number());
2326  for (unsigned int k=0; k<ndot; k++)
2327  B[i+j*ldb].fastAccessDx(k) = urand.number();
2328  }
2329  }
2330  FadType alpha(ndot, urand.number());
2331  FadType beta(ndot, urand.number());
2332  for (unsigned int k=0; k<ndot; k++) {
2333  alpha.fastAccessDx(k) = urand.number();
2334  beta.fastAccessDx(k) = urand.number();
2335  }
2336 
2337  for (unsigned int j=0; j<n; j++) {
2338  for (unsigned int i=0; i<ldc; i++) {
2339  ScalarType val = urand.number();
2340  C1[i+j*ldc] = FadType(ndot, val);
2341  C2[i+j*ldc] = FadType(ndot, val);
2342  C3[i+j*ldc] = FadType(ndot, val);
2343  for (unsigned int k=0; k<ndot; k++) {
2344  val = urand.number();
2345  C1[i+j*ldc].fastAccessDx(k) = val;
2346  C2[i+j*ldc].fastAccessDx(k) = val;
2347  C3[i+j*ldc].fastAccessDx(k) = val;
2348  }
2349  }
2350  }
2351 
2352  Teuchos::BLAS<int,FadType> teuchos_blas;
2353  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2354  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2355 
2356  Teuchos::BLAS<int,FadType> sacado_blas(false);
2357  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2359 
2360  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2361 
2362  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2363  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2364  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2365  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2366 
2367  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2368 }
2369 
2370 // Tests different lda, ldb, ldc with transa
2371 template <class FadType, class ScalarType>
2372 void
2375  unsigned int lda = l+3;
2376  unsigned int ldb = l+4;
2377  unsigned int ldc = m+5;
2378  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2379  C3(ldc*n,ndot);
2380  for (unsigned int j=0; j<m; j++) {
2381  for (unsigned int i=0; i<lda; i++) {
2382  A[i+j*lda] = FadType(ndot, urand.number());
2383  for (unsigned int k=0; k<ndot; k++)
2384  A[i+j*lda].fastAccessDx(k) = urand.number();
2385  }
2386  }
2387  for (unsigned int j=0; j<n; j++) {
2388  for (unsigned int i=0; i<ldb; i++) {
2389  B[i+j*ldb] = FadType(ndot, urand.number());
2390  for (unsigned int k=0; k<ndot; k++)
2391  B[i+j*ldb].fastAccessDx(k) = urand.number();
2392  }
2393  }
2394  FadType alpha(ndot, urand.number());
2395  FadType beta(ndot, urand.number());
2396  for (unsigned int k=0; k<ndot; k++) {
2397  alpha.fastAccessDx(k) = urand.number();
2398  beta.fastAccessDx(k) = urand.number();
2399  }
2400 
2401  for (unsigned int j=0; j<n; j++) {
2402  for (unsigned int i=0; i<ldc; i++) {
2403  ScalarType val = urand.number();
2404  C1[i+j*ldc] = FadType(ndot, val);
2405  C2[i+j*ldc] = FadType(ndot, val);
2406  C3[i+j*ldc] = FadType(ndot, val);
2407  for (unsigned int k=0; k<ndot; k++) {
2408  val = urand.number();
2409  C1[i+j*ldc].fastAccessDx(k) = val;
2410  C2[i+j*ldc].fastAccessDx(k) = val;
2411  C3[i+j*ldc].fastAccessDx(k) = val;
2412  }
2413  }
2414  }
2415 
2416  Teuchos::BLAS<int,FadType> teuchos_blas;
2417  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2418  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2419 
2420  Teuchos::BLAS<int,FadType> sacado_blas(false);
2421  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2422  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2423 
2424  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2425 
2426  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2427  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2428  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2429  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2430 
2431  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2432 }
2433 
2434 // Tests different lda, ldb, ldc with transb
2435 template <class FadType, class ScalarType>
2436 void
2439  unsigned int lda = m+4;
2440  unsigned int ldb = n+4;
2441  unsigned int ldc = m+5;
2442  VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2443  C3(ldc*n,ndot);
2444  for (unsigned int j=0; j<l; j++) {
2445  for (unsigned int i=0; i<lda; i++) {
2446  A[i+j*lda] = FadType(ndot, urand.number());
2447  for (unsigned int k=0; k<ndot; k++)
2448  A[i+j*lda].fastAccessDx(k) = urand.number();
2449  }
2450  }
2451  for (unsigned int j=0; j<l; j++) {
2452  for (unsigned int i=0; i<ldb; i++) {
2453  B[i+j*ldb] = FadType(ndot, urand.number());
2454  for (unsigned int k=0; k<ndot; k++)
2455  B[i+j*ldb].fastAccessDx(k) = urand.number();
2456  }
2457  }
2458  FadType alpha(ndot, urand.number());
2459  FadType beta(ndot, urand.number());
2460  for (unsigned int k=0; k<ndot; k++) {
2461  alpha.fastAccessDx(k) = urand.number();
2462  beta.fastAccessDx(k) = urand.number();
2463  }
2464 
2465  for (unsigned int j=0; j<n; j++) {
2466  for (unsigned int i=0; i<ldc; i++) {
2467  ScalarType val = urand.number();
2468  C1[i+j*ldc] = FadType(ndot, val);
2469  C2[i+j*ldc] = FadType(ndot, val);
2470  C3[i+j*ldc] = FadType(ndot, val);
2471  for (unsigned int k=0; k<ndot; k++) {
2472  val = urand.number();
2473  C1[i+j*ldc].fastAccessDx(k) = val;
2474  C2[i+j*ldc].fastAccessDx(k) = val;
2475  C3[i+j*ldc].fastAccessDx(k) = val;
2476  }
2477  }
2478  }
2479 
2480  Teuchos::BLAS<int,FadType> teuchos_blas;
2481  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2482  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2483 
2484  Teuchos::BLAS<int,FadType> sacado_blas(false);
2485  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2486  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2487 
2488  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2489 
2490  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2491  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2492  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2493  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2494 
2495  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2496 }
2497 
2498 // Tests different lda, ldb, ldc with transa and transb
2499 template <class FadType, class ScalarType>
2500 void
2503  unsigned int lda = l+3;
2504  unsigned int ldb = n+4;
2505  unsigned int ldc = m+5;
2506  VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2507  C3(ldc*n,ndot);
2508  for (unsigned int j=0; j<m; j++) {
2509  for (unsigned int i=0; i<lda; i++) {
2510  A[i+j*lda] = FadType(ndot, urand.number());
2511  for (unsigned int k=0; k<ndot; k++)
2512  A[i+j*lda].fastAccessDx(k) = urand.number();
2513  }
2514  }
2515  for (unsigned int j=0; j<l; j++) {
2516  for (unsigned int i=0; i<ldb; i++) {
2517  B[i+j*ldb] = FadType(ndot, urand.number());
2518  for (unsigned int k=0; k<ndot; k++)
2519  B[i+j*ldb].fastAccessDx(k) = urand.number();
2520  }
2521  }
2522  FadType alpha(ndot, urand.number());
2523  FadType beta(ndot, urand.number());
2524  for (unsigned int k=0; k<ndot; k++) {
2525  alpha.fastAccessDx(k) = urand.number();
2526  beta.fastAccessDx(k) = urand.number();
2527  }
2528 
2529  for (unsigned int j=0; j<n; j++) {
2530  for (unsigned int i=0; i<ldc; i++) {
2531  ScalarType val = urand.number();
2532  C1[i+j*ldc] = FadType(ndot, val);
2533  C2[i+j*ldc] = FadType(ndot, val);
2534  C3[i+j*ldc] = FadType(ndot, val);
2535  for (unsigned int k=0; k<ndot; k++) {
2536  val = urand.number();
2537  C1[i+j*ldc].fastAccessDx(k) = val;
2538  C2[i+j*ldc].fastAccessDx(k) = val;
2539  C3[i+j*ldc].fastAccessDx(k) = val;
2540  }
2541  }
2542  }
2543 
2544  Teuchos::BLAS<int,FadType> teuchos_blas;
2545  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2546  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2547 
2548  Teuchos::BLAS<int,FadType> sacado_blas(false);
2549  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2550  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2551 
2552  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2553 
2554  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2555  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2556  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2557  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2558 
2559  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2560 }
2561 
2562 // Tests with constant C
2563 template <class FadType, class ScalarType>
2564 void
2567  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2568  for (unsigned int j=0; j<l; j++) {
2569  for (unsigned int i=0; i<m; i++) {
2570  A[i+j*m] = FadType(ndot, urand.number());
2571  for (unsigned int k=0; k<ndot; k++)
2572  A[i+j*m].fastAccessDx(k) = urand.number();
2573  }
2574  }
2575  for (unsigned int j=0; j<n; j++) {
2576  for (unsigned int i=0; i<l; i++) {
2577  B[i+j*l] = FadType(ndot, urand.number());
2578  for (unsigned int k=0; k<ndot; k++)
2579  B[i+j*l].fastAccessDx(k) = urand.number();
2580  }
2581  }
2582  FadType alpha(ndot, urand.number());
2583  FadType beta(ndot, urand.number());
2584  for (unsigned int k=0; k<ndot; k++) {
2585  alpha.fastAccessDx(k) = urand.number();
2586  beta.fastAccessDx(k) = urand.number();
2587  }
2588 
2589  for (unsigned int j=0; j<n; j++) {
2590  for (unsigned int i=0; i<m; i++) {
2591  ScalarType val = urand.number();
2592  C1[i+j*m] = val;
2593  C2[i+j*m] = val;
2594  C3[i+j*m] = val;
2595  }
2596  }
2597 
2598  Teuchos::BLAS<int,FadType> teuchos_blas;
2599  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2600  &A[0], m, &B[0], l, beta, &C1[0], m);
2601 
2602  Teuchos::BLAS<int,FadType> sacado_blas(false);
2603  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2604  &A[0], m, &B[0], l, beta, &C2[0], m);
2605 
2606  COMPARE_FAD_VECTORS(C1, C2, m*n);
2607 
2608  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2609  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2610  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2611  &A[0], m, &B[0], l, beta, &C3[0], m);
2612 
2613  COMPARE_FAD_VECTORS(C1, C3, m*n);
2614 
2615  // transa
2616  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2617  &A[0], l, &B[0], l, beta, &C1[0], m);
2618  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2619  &A[0], l, &B[0], l, beta, &C2[0], m);
2620  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2621  &A[0], l, &B[0], l, beta, &C3[0], m);
2622 
2623  COMPARE_FAD_VECTORS(C1, C2, m*n);
2624  COMPARE_FAD_VECTORS(C1, C3, m*n);
2625 
2626  // transb
2627  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2628  &A[0], m, &B[0], n, beta, &C1[0], m);
2629  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2630  &A[0], m, &B[0], n, beta, &C2[0], m);
2631  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2632  &A[0], m, &B[0], n, beta, &C3[0], m);
2633 
2634  COMPARE_FAD_VECTORS(C1, C2, m*n);
2635  COMPARE_FAD_VECTORS(C1, C3, m*n);
2636 
2637  // transa and transb
2638  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2639  &A[0], l, &B[0], n, beta, &C1[0], m);
2640  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2641  &A[0], l, &B[0], n, beta, &C2[0], m);
2642  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2643  &A[0], l, &B[0], n, beta, &C3[0], m);
2644 
2645  COMPARE_FAD_VECTORS(C1, C2, m*n);
2646  COMPARE_FAD_VECTORS(C1, C3, m*n);
2647 }
2648 
2649 // Tests with constant alpha, beta
2650 template <class FadType, class ScalarType>
2651 void
2654  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2655  for (unsigned int j=0; j<l; j++) {
2656  for (unsigned int i=0; i<m; i++) {
2657  A[i+j*m] = FadType(ndot, urand.number());
2658  for (unsigned int k=0; k<ndot; k++)
2659  A[i+j*m].fastAccessDx(k) = urand.number();
2660  }
2661  }
2662  for (unsigned int j=0; j<n; j++) {
2663  for (unsigned int i=0; i<l; i++) {
2664  B[i+j*l] = FadType(ndot, urand.number());
2665  for (unsigned int k=0; k<ndot; k++)
2666  B[i+j*l].fastAccessDx(k) = urand.number();
2667  }
2668  }
2669  ScalarType alpha = urand.number();
2670  ScalarType beta = urand.number();
2671 
2672  for (unsigned int j=0; j<n; j++) {
2673  for (unsigned int i=0; i<m; i++) {
2674  ScalarType val = urand.number();
2675  C1[i+j*m] = FadType(ndot, val);
2676  C2[i+j*m] = FadType(ndot, val);
2677  C3[i+j*m] = FadType(ndot, val);
2678  for (unsigned int k=0; k<ndot; k++) {
2679  val = urand.number();
2680  C1[i+j*m].fastAccessDx(k) = val;
2681  C2[i+j*m].fastAccessDx(k) = val;
2682  C3[i+j*m].fastAccessDx(k) = val;
2683  }
2684  }
2685  }
2686 
2687  Teuchos::BLAS<int,FadType> teuchos_blas;
2688  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2689  &A[0], m, &B[0], l, beta, &C1[0], m);
2690 
2691  Teuchos::BLAS<int,FadType> sacado_blas(false);
2692  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2693  &A[0], m, &B[0], l, beta, &C2[0], m);
2694 
2695  COMPARE_FAD_VECTORS(C1, C2, m*n);
2696 
2697  unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2698  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2699  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2700  &A[0], m, &B[0], l, beta, &C3[0], m);
2701 
2702  COMPARE_FAD_VECTORS(C1, C3, m*n);
2703 
2704  // transa
2705  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2706  &A[0], l, &B[0], l, beta, &C1[0], m);
2707  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2708  &A[0], l, &B[0], l, beta, &C2[0], m);
2709  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2710  &A[0], l, &B[0], l, beta, &C3[0], m);
2711 
2712  COMPARE_FAD_VECTORS(C1, C2, m*n);
2713  COMPARE_FAD_VECTORS(C1, C3, m*n);
2714 
2715  // transb
2716  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2717  &A[0], m, &B[0], n, beta, &C1[0], m);
2718  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2719  &A[0], m, &B[0], n, beta, &C2[0], m);
2720  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2721  &A[0], m, &B[0], n, beta, &C3[0], m);
2722 
2723  COMPARE_FAD_VECTORS(C1, C2, m*n);
2724  COMPARE_FAD_VECTORS(C1, C3, m*n);
2725 
2726  // transa and transb
2727  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2728  &A[0], l, &B[0], n, beta, &C1[0], m);
2729  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2730  &A[0], l, &B[0], n, beta, &C2[0], m);
2731  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2732  &A[0], l, &B[0], n, beta, &C3[0], m);
2733 
2734  COMPARE_FAD_VECTORS(C1, C2, m*n);
2735  COMPARE_FAD_VECTORS(C1, C3, m*n);
2736 }
2737 
2738 // Tests with constant A
2739 template <class FadType, class ScalarType>
2740 void
2743  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2744  C4(m*n,ndot), C5(m*n,ndot);
2745  std::vector<ScalarType> a(m*l);
2746  for (unsigned int j=0; j<l; j++) {
2747  for (unsigned int i=0; i<m; i++) {
2748  a[i+j*m] = urand.number();
2749  A[i+j*m] = a[i+j*m];
2750  }
2751  }
2752  for (unsigned int j=0; j<n; j++) {
2753  for (unsigned int i=0; i<l; i++) {
2754  B[i+j*l] = FadType(ndot, urand.number());
2755  for (unsigned int k=0; k<ndot; k++)
2756  B[i+j*l].fastAccessDx(k) = urand.number();
2757  }
2758  }
2759  FadType alpha(ndot, urand.number());
2760  FadType beta(ndot, urand.number());
2761  for (unsigned int k=0; k<ndot; k++) {
2762  alpha.fastAccessDx(k) = urand.number();
2763  beta.fastAccessDx(k) = urand.number();
2764  }
2765 
2766  for (unsigned int j=0; j<n; j++) {
2767  for (unsigned int i=0; i<m; i++) {
2768  ScalarType val = urand.number();
2769  C1[i+j*m] = FadType(ndot, val);
2770  C2[i+j*m] = FadType(ndot, val);
2771  C3[i+j*m] = FadType(ndot, val);
2772  C4[i+j*m] = FadType(ndot, val);
2773  C5[i+j*m] = FadType(ndot, val);
2774  for (unsigned int k=0; k<ndot; k++) {
2775  val = urand.number();
2776  C1[i+j*m].fastAccessDx(k) = val;
2777  C2[i+j*m].fastAccessDx(k) = val;
2778  C3[i+j*m].fastAccessDx(k) = val;
2779  C4[i+j*m].fastAccessDx(k) = val;
2780  C5[i+j*m].fastAccessDx(k) = val;
2781  }
2782  }
2783  }
2784 
2785  Teuchos::BLAS<int,FadType> teuchos_blas;
2786  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2787  &A[0], m, &B[0], l, beta, &C1[0], m);
2788 
2789  Teuchos::BLAS<int,FadType> sacado_blas(false);
2790  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2791  &A[0], m, &B[0], l, beta, &C2[0], m);
2792 
2793  COMPARE_FAD_VECTORS(C1, C2, m*n);
2794 
2795  unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2796  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2797  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2798  &A[0], m, &B[0], l, beta, &C3[0], m);
2799 
2800  COMPARE_FAD_VECTORS(C1, C3, m*n);
2801 
2802  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2803  &a[0], m, &B[0], l, beta, &C4[0], m);
2804 
2805  COMPARE_FAD_VECTORS(C1, C4, m*n);
2806 
2807  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2808  &a[0], m, &B[0], l, beta, &C5[0], m);
2809 
2810  COMPARE_FAD_VECTORS(C1, C5, m*n);
2811 
2812  // transa
2813  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2814  &A[0], l, &B[0], l, beta, &C1[0], m);
2815  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2816  &A[0], l, &B[0], l, beta, &C2[0], m);
2817  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2818  &A[0], l, &B[0], l, beta, &C3[0], m);
2819  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2820  &a[0], l, &B[0], l, beta, &C4[0], m);
2821  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2822  &a[0], l, &B[0], l, beta, &C5[0], m);
2823 
2824  COMPARE_FAD_VECTORS(C1, C2, m*n);
2825  COMPARE_FAD_VECTORS(C1, C3, m*n);
2826  COMPARE_FAD_VECTORS(C1, C4, m*n);
2827  COMPARE_FAD_VECTORS(C1, C5, m*n);
2828 
2829  // transb
2830  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2831  &A[0], m, &B[0], n, beta, &C1[0], m);
2832  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2833  &A[0], m, &B[0], n, beta, &C2[0], m);
2834  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2835  &A[0], m, &B[0], n, beta, &C3[0], m);
2836  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2837  &a[0], m, &B[0], n, beta, &C4[0], m);
2838  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2839  &a[0], m, &B[0], n, beta, &C5[0], m);
2840 
2841  COMPARE_FAD_VECTORS(C1, C2, m*n);
2842  COMPARE_FAD_VECTORS(C1, C3, m*n);
2843  COMPARE_FAD_VECTORS(C1, C4, m*n);
2844  COMPARE_FAD_VECTORS(C1, C5, m*n);
2845 
2846  // transa and transb
2847  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2848  &A[0], l, &B[0], n, beta, &C1[0], m);
2849  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2850  &A[0], l, &B[0], n, beta, &C2[0], m);
2851  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2852  &A[0], l, &B[0], n, beta, &C3[0], m);
2853  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2854  &a[0], l, &B[0], n, beta, &C4[0], m);
2855  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2856  &a[0], l, &B[0], n, beta, &C5[0], m);
2857 
2858  COMPARE_FAD_VECTORS(C1, C2, m*n);
2859  COMPARE_FAD_VECTORS(C1, C3, m*n);
2860  COMPARE_FAD_VECTORS(C1, C4, m*n);
2861  COMPARE_FAD_VECTORS(C1, C5, m*n);
2862 }
2863 
2864 // Tests with constant B
2865 template <class FadType, class ScalarType>
2866 void
2869  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2870  C4(m*n,ndot), C5(m*n,ndot);
2871  std::vector<ScalarType> b(l*n);
2872  for (unsigned int j=0; j<l; j++) {
2873  for (unsigned int i=0; i<m; i++) {
2874  A[i+j*m] = FadType(ndot, urand.number());
2875  for (unsigned int k=0; k<ndot; k++)
2876  A[i+j*m].fastAccessDx(k) = urand.number();
2877  }
2878  }
2879  for (unsigned int j=0; j<n; j++) {
2880  for (unsigned int i=0; i<l; i++) {
2881  b[i+j*l] = urand.number();
2882  B[i+j*l] = b[i+j*l];
2883  }
2884  }
2885  FadType alpha(ndot, urand.number());
2886  FadType beta(ndot, urand.number());
2887  for (unsigned int k=0; k<ndot; k++) {
2888  alpha.fastAccessDx(k) = urand.number();
2889  beta.fastAccessDx(k) = urand.number();
2890  }
2891 
2892  for (unsigned int j=0; j<n; j++) {
2893  for (unsigned int i=0; i<m; i++) {
2894  ScalarType val = urand.number();
2895  C1[i+j*m] = FadType(ndot, val);
2896  C2[i+j*m] = FadType(ndot, val);
2897  C3[i+j*m] = FadType(ndot, val);
2898  C4[i+j*m] = FadType(ndot, val);
2899  C5[i+j*m] = FadType(ndot, val);
2900  for (unsigned int k=0; k<ndot; k++) {
2901  val = urand.number();
2902  C1[i+j*m].fastAccessDx(k) = val;
2903  C2[i+j*m].fastAccessDx(k) = val;
2904  C3[i+j*m].fastAccessDx(k) = val;
2905  C4[i+j*m].fastAccessDx(k) = val;
2906  C5[i+j*m].fastAccessDx(k) = val;
2907  }
2908  }
2909  }
2910 
2911  Teuchos::BLAS<int,FadType> teuchos_blas;
2912  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2913  &A[0], m, &B[0], l, beta, &C1[0], m);
2914 
2915  Teuchos::BLAS<int,FadType> sacado_blas(false);
2916  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2917  &A[0], m, &B[0], l, beta, &C2[0], m);
2918 
2919  COMPARE_FAD_VECTORS(C1, C2, m*n);
2920 
2921  unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2922  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2923  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2924  &A[0], m, &B[0], l, beta, &C3[0], m);
2925 
2926  COMPARE_FAD_VECTORS(C1, C3, m*n);
2927 
2928  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2929  &A[0], m, &b[0], l, beta, &C4[0], m);
2930 
2931  COMPARE_FAD_VECTORS(C1, C4, m*n);
2932 
2933  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2934  &A[0], m, &b[0], l, beta, &C5[0], m);
2935 
2936  COMPARE_FAD_VECTORS(C1, C5, m*n);
2937 
2938  // transa
2939  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2940  &A[0], l, &B[0], l, beta, &C1[0], m);
2941  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2942  &A[0], l, &B[0], l, beta, &C2[0], m);
2943  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2944  &A[0], l, &B[0], l, beta, &C3[0], m);
2945  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2946  &A[0], l, &b[0], l, beta, &C4[0], m);
2947  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2948  &A[0], l, &b[0], l, beta, &C5[0], m);
2949 
2950  COMPARE_FAD_VECTORS(C1, C2, m*n);
2951  COMPARE_FAD_VECTORS(C1, C3, m*n);
2952  COMPARE_FAD_VECTORS(C1, C4, m*n);
2953  COMPARE_FAD_VECTORS(C1, C5, m*n);
2954 
2955  // transb
2956  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2957  &A[0], m, &B[0], n, beta, &C1[0], m);
2958  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2959  &A[0], m, &B[0], n, beta, &C2[0], m);
2960  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2961  &A[0], m, &B[0], n, beta, &C3[0], m);
2962  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2963  &A[0], m, &b[0], n, beta, &C4[0], m);
2964  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2965  &A[0], m, &b[0], n, beta, &C5[0], m);
2966 
2967  COMPARE_FAD_VECTORS(C1, C2, m*n);
2968  COMPARE_FAD_VECTORS(C1, C3, m*n);
2969  COMPARE_FAD_VECTORS(C1, C4, m*n);
2970  COMPARE_FAD_VECTORS(C1, C5, m*n);
2971 
2972  // transa and transb
2973  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2974  &A[0], l, &B[0], n, beta, &C1[0], m);
2975  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2976  &A[0], l, &B[0], n, beta, &C2[0], m);
2977  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2978  &A[0], l, &B[0], n, beta, &C3[0], m);
2979  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2980  &A[0], l, &b[0], n, beta, &C4[0], m);
2981  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2982  &A[0], l, &b[0], n, beta, &C5[0], m);
2983 
2984  COMPARE_FAD_VECTORS(C1, C2, m*n);
2985  COMPARE_FAD_VECTORS(C1, C3, m*n);
2986  COMPARE_FAD_VECTORS(C1, C4, m*n);
2987  COMPARE_FAD_VECTORS(C1, C5, m*n);
2988 }
2989 
2990 // Tests with constant A and B
2991 template <class FadType, class ScalarType>
2992 void
2995  VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2996  C4(m*n,ndot), C5(m*n,ndot);
2997  std::vector<ScalarType> a(m*l), b(l*n);
2998  for (unsigned int j=0; j<l; j++) {
2999  for (unsigned int i=0; i<m; i++) {
3000  a[i+j*m] = urand.number();
3001  A[i+j*m] = a[i+j*m];
3002  }
3003  }
3004  for (unsigned int j=0; j<n; j++) {
3005  for (unsigned int i=0; i<l; i++) {
3006  b[i+j*l] = urand.number();
3007  B[i+j*l] = b[i+j*l];
3008  }
3009  }
3010  FadType alpha(ndot, urand.number());
3011  FadType beta(ndot, urand.number());
3012  for (unsigned int k=0; k<ndot; k++) {
3013  alpha.fastAccessDx(k) = urand.number();
3014  beta.fastAccessDx(k) = urand.number();
3015  }
3016 
3017  for (unsigned int j=0; j<n; j++) {
3018  for (unsigned int i=0; i<m; i++) {
3019  ScalarType val = urand.number();
3020  C1[i+j*m] = FadType(ndot, val);
3021  C2[i+j*m] = FadType(ndot, val);
3022  C3[i+j*m] = FadType(ndot, val);
3023  C4[i+j*m] = FadType(ndot, val);
3024  C5[i+j*m] = FadType(ndot, val);
3025  for (unsigned int k=0; k<ndot; k++) {
3026  val = urand.number();
3027  C1[i+j*m].fastAccessDx(k) = val;
3028  C2[i+j*m].fastAccessDx(k) = val;
3029  C3[i+j*m].fastAccessDx(k) = val;
3030  C4[i+j*m].fastAccessDx(k) = val;
3031  C5[i+j*m].fastAccessDx(k) = val;
3032  }
3033  }
3034  }
3035 
3036  Teuchos::BLAS<int,FadType> teuchos_blas;
3037  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3038  &A[0], m, &B[0], l, beta, &C1[0], m);
3039 
3040  Teuchos::BLAS<int,FadType> sacado_blas(false);
3041  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3042  &A[0], m, &B[0], l, beta, &C2[0], m);
3043 
3044  COMPARE_FAD_VECTORS(C1, C2, m*n);
3045 
3046  unsigned int sz = m*l + l*n + m*n*(1+ndot);
3047  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3048  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3049  &A[0], m, &B[0], l, beta, &C3[0], m);
3050 
3051  COMPARE_FAD_VECTORS(C1, C3, m*n);
3052 
3053  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3054  &a[0], m, &b[0], l, beta, &C4[0], m);
3055 
3056  COMPARE_FAD_VECTORS(C1, C4, m*n);
3057 
3058  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3059  &a[0], m, &b[0], l, beta, &C5[0], m);
3060 
3061  COMPARE_FAD_VECTORS(C1, C5, m*n);
3062 
3063  // transa
3064  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3065  &A[0], l, &B[0], l, beta, &C1[0], m);
3066  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3067  &A[0], l, &B[0], l, beta, &C2[0], m);
3068  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3069  &A[0], l, &B[0], l, beta, &C3[0], m);
3070  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3071  &a[0], l, &b[0], l, beta, &C4[0], m);
3072  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3073  &a[0], l, &b[0], l, beta, &C5[0], m);
3074 
3075  COMPARE_FAD_VECTORS(C1, C2, m*n);
3076  COMPARE_FAD_VECTORS(C1, C3, m*n);
3077  COMPARE_FAD_VECTORS(C1, C4, m*n);
3078  COMPARE_FAD_VECTORS(C1, C5, m*n);
3079 
3080  // transb
3081  teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3082  &A[0], m, &B[0], n, beta, &C1[0], m);
3083  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3084  &A[0], m, &B[0], n, beta, &C2[0], m);
3085  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3086  &A[0], m, &B[0], n, beta, &C3[0], m);
3087  sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3088  &a[0], m, &b[0], n, beta, &C4[0], m);
3089  sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3090  &a[0], m, &b[0], n, beta, &C5[0], m);
3091 
3092  COMPARE_FAD_VECTORS(C1, C2, m*n);
3093  COMPARE_FAD_VECTORS(C1, C3, m*n);
3094  COMPARE_FAD_VECTORS(C1, C4, m*n);
3095  COMPARE_FAD_VECTORS(C1, C5, m*n);
3096 
3097  // transa and transb
3098  teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3099  &A[0], l, &B[0], n, beta, &C1[0], m);
3100  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3101  &A[0], l, &B[0], n, beta, &C2[0], m);
3102  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3103  &A[0], l, &B[0], n, beta, &C3[0], m);
3104  sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3105  &a[0], l, &b[0], n, beta, &C4[0], m);
3106  sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3107  &a[0], l, &b[0], n, beta, &C5[0], m);
3108 
3109  COMPARE_FAD_VECTORS(C1, C2, m*n);
3110  COMPARE_FAD_VECTORS(C1, C3, m*n);
3111  COMPARE_FAD_VECTORS(C1, C4, m*n);
3112  COMPARE_FAD_VECTORS(C1, C5, m*n);
3113 }
3114 
3115 // Tests all arguments, left side
3116 template <class FadType, class ScalarType>
3117 void
3120 
3121  // SYMM is apparently not defined in the BLAS for complex types
3123  return;
3124 
3125  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3126  for (unsigned int j=0; j<m; j++) {
3127  for (unsigned int i=0; i<m; i++) {
3128  A[i+j*m] = FadType(ndot, urand.number());
3129  for (unsigned int k=0; k<ndot; k++)
3130  A[i+j*m].fastAccessDx(k) = urand.number();
3131  }
3132  }
3133  for (unsigned int j=0; j<n; j++) {
3134  for (unsigned int i=0; i<m; i++) {
3135  B[i+j*m] = FadType(ndot, urand.number());
3136  for (unsigned int k=0; k<ndot; k++)
3137  B[i+j*m].fastAccessDx(k) = urand.number();
3138  }
3139  }
3140  FadType alpha(ndot, urand.number());
3141  FadType beta(ndot, urand.number());
3142  for (unsigned int k=0; k<ndot; k++) {
3143  alpha.fastAccessDx(k) = urand.number();
3144  beta.fastAccessDx(k) = urand.number();
3145  }
3146 
3147  for (unsigned int j=0; j<n; j++) {
3148  for (unsigned int i=0; i<m; i++) {
3149  ScalarType val = urand.number();
3150  C1[i+j*m] = FadType(ndot, val);
3151  C2[i+j*m] = FadType(ndot, val);
3152  C3[i+j*m] = FadType(ndot, val);
3153  for (unsigned int k=0; k<ndot; k++) {
3154  val = urand.number();
3155  C1[i+j*m].fastAccessDx(k) = val;
3156  C2[i+j*m].fastAccessDx(k) = val;
3157  C3[i+j*m].fastAccessDx(k) = val;
3158  }
3159  }
3160  }
3161 
3162  Teuchos::BLAS<int,FadType> teuchos_blas;
3163  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3164  &A[0], m, &B[0], m, beta, &C1[0], m);
3165 
3166  Teuchos::BLAS<int,FadType> sacado_blas(false);
3167  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3168  &A[0], m, &B[0], m, beta, &C2[0], m);
3169 
3170  COMPARE_FAD_VECTORS(C1, C2, m*n);
3171 
3172  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3173  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3174  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3175  &A[0], m, &B[0], m, beta, &C3[0], m);
3176 
3177  COMPARE_FAD_VECTORS(C1, C3, m*n);
3178 
3179  // lower tri
3180  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3181  &A[0], m, &B[0], m, beta, &C1[0], m);
3182  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3183  &A[0], m, &B[0], m, beta, &C2[0], m);
3184  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3185  &A[0], m, &B[0], m, beta, &C3[0], m);
3186 
3187  COMPARE_FAD_VECTORS(C1, C2, m*n);
3188  COMPARE_FAD_VECTORS(C1, C3, m*n);
3189 }
3190 
3191 // Tests all arguments, right side
3192 template <class FadType, class ScalarType>
3193 void
3196 
3197  // SYMM is apparently not defined in the BLAS for complex types
3199  return;
3200 
3201  VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3202  for (unsigned int j=0; j<n; j++) {
3203  for (unsigned int i=0; i<n; i++) {
3204  A[i+j*n] = FadType(ndot, urand.number());
3205  for (unsigned int k=0; k<ndot; k++)
3206  A[i+j*n].fastAccessDx(k) = urand.number();
3207  }
3208  }
3209  for (unsigned int j=0; j<n; j++) {
3210  for (unsigned int i=0; i<m; i++) {
3211  B[i+j*m] = FadType(ndot, urand.number());
3212  for (unsigned int k=0; k<ndot; k++)
3213  B[i+j*m].fastAccessDx(k) = urand.number();
3214  }
3215  }
3216  FadType alpha(ndot, urand.number());
3217  FadType beta(ndot, urand.number());
3218  for (unsigned int k=0; k<ndot; k++) {
3219  alpha.fastAccessDx(k) = urand.number();
3220  beta.fastAccessDx(k) = urand.number();
3221  }
3222 
3223  for (unsigned int j=0; j<n; j++) {
3224  for (unsigned int i=0; i<m; i++) {
3225  ScalarType val = urand.number();
3226  C1[i+j*m] = FadType(ndot, val);
3227  C2[i+j*m] = FadType(ndot, val);
3228  C3[i+j*m] = FadType(ndot, val);
3229  for (unsigned int k=0; k<ndot; k++) {
3230  val = urand.number();
3231  C1[i+j*m].fastAccessDx(k) = val;
3232  C2[i+j*m].fastAccessDx(k) = val;
3233  C3[i+j*m].fastAccessDx(k) = val;
3234  }
3235  }
3236  }
3237 
3238  Teuchos::BLAS<int,FadType> teuchos_blas;
3239  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3240  &A[0], n, &B[0], m, beta, &C1[0], m);
3241 
3242  Teuchos::BLAS<int,FadType> sacado_blas(false);
3243  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3244  &A[0], n, &B[0], m, beta, &C2[0], m);
3245 
3246  COMPARE_FAD_VECTORS(C1, C2, m*n);
3247 
3248  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3249  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3250  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3251  &A[0], n, &B[0], m, beta, &C3[0], m);
3252 
3253  COMPARE_FAD_VECTORS(C1, C3, m*n);
3254 
3255  // lower tri
3256  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3257  &A[0], n, &B[0], m, beta, &C1[0], m);
3258  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3259  &A[0], n, &B[0], m, beta, &C2[0], m);
3260  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3261  &A[0], n, &B[0], m, beta, &C3[0], m);
3262 
3263  COMPARE_FAD_VECTORS(C1, C2, m*n);
3264  COMPARE_FAD_VECTORS(C1, C3, m*n);
3265 }
3266 
3267 // Tests different lda, ldb, ldc, left side
3268 template <class FadType, class ScalarType>
3269 void
3272 
3273  // SYMM is apparently not defined in the BLAS for complex types
3275  return;
3276 
3277  unsigned int lda = m+4;
3278  unsigned int ldb = m+5;
3279  unsigned int ldc = m+6;
3280  VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3281  C3(ldc*n,ndot);
3282  for (unsigned int j=0; j<m; j++) {
3283  for (unsigned int i=0; i<lda; i++) {
3284  A[i+j*lda] = FadType(ndot, urand.number());
3285  for (unsigned int k=0; k<ndot; k++)
3286  A[i+j*lda].fastAccessDx(k) = urand.number();
3287  }
3288  }
3289  for (unsigned int j=0; j<n; j++) {
3290  for (unsigned int i=0; i<ldb; i++) {
3291  B[i+j*ldb] = FadType(ndot, urand.number());
3292  for (unsigned int k=0; k<ndot; k++)
3293  B[i+j*ldb].fastAccessDx(k) = urand.number();
3294  }
3295  }
3296  FadType alpha(ndot, urand.number());
3297  FadType beta(ndot, urand.number());
3298  for (unsigned int k=0; k<ndot; k++) {
3299  alpha.fastAccessDx(k) = urand.number();
3300  beta.fastAccessDx(k) = urand.number();
3301  }
3302 
3303  for (unsigned int j=0; j<n; j++) {
3304  for (unsigned int i=0; i<ldc; i++) {
3305  ScalarType val = urand.number();
3306  C1[i+j*ldc] = FadType(ndot, val);
3307  C2[i+j*ldc] = FadType(ndot, val);
3308  C3[i+j*ldc] = FadType(ndot, val);
3309  for (unsigned int k=0; k<ndot; k++) {
3310  val = urand.number();
3311  C1[i+j*ldc].fastAccessDx(k) = val;
3312  C2[i+j*ldc].fastAccessDx(k) = val;
3313  C3[i+j*ldc].fastAccessDx(k) = val;
3314  }
3315  }
3316  }
3317 
3318  Teuchos::BLAS<int,FadType> teuchos_blas;
3319  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3320  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3321 
3322  Teuchos::BLAS<int,FadType> sacado_blas(false);
3323  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3324  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3325 
3326  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3327 
3328  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3329  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3330  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3331  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3332 
3333  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3334 
3335  // lower tri
3336  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3337  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3338  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3339  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3340  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3341  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3342 
3343  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3344  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3345 }
3346 
3347 // Tests different lda, ldb, ldc, right side
3348 template <class FadType, class ScalarType>
3349 void
3352 
3353  // SYMM is apparently not defined in the BLAS for complex types
3355  return;
3356 
3357  unsigned int lda = n+4;
3358  unsigned int ldb = m+5;
3359  unsigned int ldc = m+6;
3360  VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3361  C3(ldc*n,ndot);
3362  for (unsigned int j=0; j<n; j++) {
3363  for (unsigned int i=0; i<lda; i++) {
3364  A[i+j*lda] = FadType(ndot, urand.number());
3365  for (unsigned int k=0; k<ndot; k++)
3366  A[i+j*lda].fastAccessDx(k) = urand.number();
3367  }
3368  }
3369  for (unsigned int j=0; j<n; j++) {
3370  for (unsigned int i=0; i<ldb; i++) {
3371  B[i+j*ldb] = FadType(ndot, urand.number());
3372  for (unsigned int k=0; k<ndot; k++)
3373  B[i+j*ldb].fastAccessDx(k) = urand.number();
3374  }
3375  }
3376  FadType alpha(ndot, urand.number());
3377  FadType beta(ndot, urand.number());
3378  for (unsigned int k=0; k<ndot; k++) {
3379  alpha.fastAccessDx(k) = urand.number();
3380  beta.fastAccessDx(k) = urand.number();
3381  }
3382 
3383  for (unsigned int j=0; j<n; j++) {
3384  for (unsigned int i=0; i<ldc; i++) {
3385  ScalarType val = urand.number();
3386  C1[i+j*ldc] = FadType(ndot, val);
3387  C2[i+j*ldc] = FadType(ndot, val);
3388  C3[i+j*ldc] = FadType(ndot, val);
3389  for (unsigned int k=0; k<ndot; k++) {
3390  val = urand.number();
3391  C1[i+j*ldc].fastAccessDx(k) = val;
3392  C2[i+j*ldc].fastAccessDx(k) = val;
3393  C3[i+j*ldc].fastAccessDx(k) = val;
3394  }
3395  }
3396  }
3397 
3398  Teuchos::BLAS<int,FadType> teuchos_blas;
3399  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3400  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3401 
3402  Teuchos::BLAS<int,FadType> sacado_blas(false);
3403  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3404  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3405 
3406  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3407 
3408  unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3409  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3410  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3411  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3412 
3413  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3414 
3415  // lower tri
3416  teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3417  &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3418  sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3419  &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3420  sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3421  &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3422 
3423  COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3424  COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3425 }
3426 
3427 // Tests with constant C
3428 template <class FadType, class ScalarType>
3429 void
3432 
3433  // SYMM is apparently not defined in the BLAS for complex types
3435  return;
3436 
3437  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3438  for (unsigned int j=0; j<m; j++) {
3439  for (unsigned int i=0; i<m; i++) {
3440  A[i+j*m] = FadType(ndot, urand.number());
3441  for (unsigned int k=0; k<ndot; k++)
3442  A[i+j*m].fastAccessDx(k) = urand.number();
3443  }
3444  }
3445  for (unsigned int j=0; j<n; j++) {
3446  for (unsigned int i=0; i<m; i++) {
3447  B[i+j*m] = FadType(ndot, urand.number());
3448  for (unsigned int k=0; k<ndot; k++)
3449  B[i+j*m].fastAccessDx(k) = urand.number();
3450  }
3451  }
3452  FadType alpha(ndot, urand.number());
3453  FadType beta(ndot, urand.number());
3454  for (unsigned int k=0; k<ndot; k++) {
3455  alpha.fastAccessDx(k) = urand.number();
3456  beta.fastAccessDx(k) = urand.number();
3457  }
3458 
3459  for (unsigned int j=0; j<n; j++) {
3460  for (unsigned int i=0; i<m; i++) {
3461  ScalarType val = urand.number();
3462  C1[i+j*m] = val;
3463  C2[i+j*m] = val;
3464  C3[i+j*m] = val;
3465  }
3466  }
3467 
3468  Teuchos::BLAS<int,FadType> teuchos_blas;
3469  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3470  &A[0], m, &B[0], m, beta, &C1[0], m);
3471 
3472  Teuchos::BLAS<int,FadType> sacado_blas(false);
3473  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3474  &A[0], m, &B[0], m, beta, &C2[0], m);
3475 
3476  COMPARE_FAD_VECTORS(C1, C2, m*n);
3477 
3478  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3479  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3480  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3481  &A[0], m, &B[0], m, beta, &C3[0], m);
3482 
3483  COMPARE_FAD_VECTORS(C1, C3, m*n);
3484 
3485  // lower tri
3486  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3487  &A[0], m, &B[0], m, beta, &C1[0], m);
3488  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3489  &A[0], m, &B[0], m, beta, &C2[0], m);
3490  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3491  &A[0], m, &B[0], m, beta, &C3[0], m);
3492 
3493  COMPARE_FAD_VECTORS(C1, C2, m*n);
3494  COMPARE_FAD_VECTORS(C1, C3, m*n);
3495 }
3496 
3497 // Tests with constant alpha, beta
3498 template <class FadType, class ScalarType>
3499 void
3502 
3503  // SYMM is apparently not defined in the BLAS for complex types
3505  return;
3506 
3507  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3508  for (unsigned int j=0; j<m; j++) {
3509  for (unsigned int i=0; i<m; i++) {
3510  A[i+j*m] = FadType(ndot, urand.number());
3511  for (unsigned int k=0; k<ndot; k++)
3512  A[i+j*m].fastAccessDx(k) = urand.number();
3513  }
3514  }
3515  for (unsigned int j=0; j<n; j++) {
3516  for (unsigned int i=0; i<m; i++) {
3517  B[i+j*m] = FadType(ndot, urand.number());
3518  for (unsigned int k=0; k<ndot; k++)
3519  B[i+j*m].fastAccessDx(k) = urand.number();
3520  }
3521  }
3522  ScalarType alpha = urand.number();
3523  ScalarType beta = urand.number();
3524 
3525  for (unsigned int j=0; j<n; j++) {
3526  for (unsigned int i=0; i<m; i++) {
3527  ScalarType val = urand.number();
3528  C1[i+j*m] = FadType(ndot, val);
3529  C2[i+j*m] = FadType(ndot, val);
3530  C3[i+j*m] = FadType(ndot, val);
3531  for (unsigned int k=0; k<ndot; k++) {
3532  val = urand.number();
3533  C1[i+j*m].fastAccessDx(k) = val;
3534  C2[i+j*m].fastAccessDx(k) = val;
3535  C3[i+j*m].fastAccessDx(k) = val;
3536  }
3537  }
3538  }
3539 
3540  Teuchos::BLAS<int,FadType> teuchos_blas;
3541  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3542  &A[0], m, &B[0], m, beta, &C1[0], m);
3543 
3544  Teuchos::BLAS<int,FadType> sacado_blas(false);
3545  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3546  &A[0], m, &B[0], m, beta, &C2[0], m);
3547 
3548  COMPARE_FAD_VECTORS(C1, C2, m*n);
3549 
3550  unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3551  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3552  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3553  &A[0], m, &B[0], m, beta, &C3[0], m);
3554 
3555  COMPARE_FAD_VECTORS(C1, C3, m*n);
3556 
3557  // lower tri
3558  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3559  &A[0], m, &B[0], m, beta, &C1[0], m);
3560  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3561  &A[0], m, &B[0], m, beta, &C2[0], m);
3562  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3563  &A[0], m, &B[0], m, beta, &C3[0], m);
3564 
3565  COMPARE_FAD_VECTORS(C1, C2, m*n);
3566  COMPARE_FAD_VECTORS(C1, C3, m*n);
3567 }
3568 
3569 // Tests with constant A
3570 template <class FadType, class ScalarType>
3571 void
3574 
3575  // SYMM is apparently not defined in the BLAS for complex types
3577  return;
3578 
3579  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3580  C4(m*n,ndot), C5(m*n,ndot);
3581  std::vector<ScalarType> a(m*m);
3582  for (unsigned int j=0; j<m; j++) {
3583  for (unsigned int i=0; i<m; i++) {
3584  a[i+j*m] = urand.number();
3585  A[i+j*m] = a[i+j*m];
3586  }
3587  }
3588  for (unsigned int j=0; j<n; j++) {
3589  for (unsigned int i=0; i<m; i++) {
3590  B[i+j*m] = FadType(ndot, urand.number());
3591  for (unsigned int k=0; k<ndot; k++)
3592  B[i+j*m].fastAccessDx(k) = urand.number();
3593  }
3594  }
3595  FadType alpha(ndot, urand.number());
3596  FadType beta(ndot, urand.number());
3597  for (unsigned int k=0; k<ndot; k++) {
3598  alpha.fastAccessDx(k) = urand.number();
3599  beta.fastAccessDx(k) = urand.number();
3600  }
3601 
3602  for (unsigned int j=0; j<n; j++) {
3603  for (unsigned int i=0; i<m; i++) {
3604  ScalarType val = urand.number();
3605  C1[i+j*m] = FadType(ndot, val);
3606  C2[i+j*m] = FadType(ndot, val);
3607  C3[i+j*m] = FadType(ndot, val);
3608  C4[i+j*m] = FadType(ndot, val);
3609  C5[i+j*m] = FadType(ndot, val);
3610  for (unsigned int k=0; k<ndot; k++) {
3611  val = urand.number();
3612  C1[i+j*m].fastAccessDx(k) = val;
3613  C2[i+j*m].fastAccessDx(k) = val;
3614  C3[i+j*m].fastAccessDx(k) = val;
3615  C4[i+j*m].fastAccessDx(k) = val;
3616  C5[i+j*m].fastAccessDx(k) = val;
3617  }
3618  }
3619  }
3620 
3621  Teuchos::BLAS<int,FadType> teuchos_blas;
3622  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3623  &A[0], m, &B[0], m, beta, &C1[0], m);
3624 
3625  Teuchos::BLAS<int,FadType> sacado_blas(false);
3626  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3627  &A[0], m, &B[0], m, beta, &C2[0], m);
3628 
3629  COMPARE_FAD_VECTORS(C1, C2, m*n);
3630 
3631  unsigned int sz = m*m + 2*m*n*(1+ndot);
3632  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3633  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3634  &A[0], m, &B[0], m, beta, &C3[0], m);
3635 
3636  COMPARE_FAD_VECTORS(C1, C3, m*n);
3637 
3638  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3639  &a[0], m, &B[0], m, beta, &C4[0], m);
3640 
3641  COMPARE_FAD_VECTORS(C1, C4, m*n);
3642 
3643  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3644  &a[0], m, &B[0], m, beta, &C5[0], m);
3645 
3646  COMPARE_FAD_VECTORS(C1, C5, m*n);
3647 
3648  // lower tri
3649  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650  &A[0], m, &B[0], m, beta, &C1[0], m);
3651  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3652  &A[0], m, &B[0], m, beta, &C2[0], m);
3653  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3654  &A[0], m, &B[0], m, beta, &C3[0], m);
3655  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3656  &a[0], m, &B[0], m, beta, &C4[0], m);
3657  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3658  &a[0], m, &B[0], m, beta, &C5[0], m);
3659 
3660  COMPARE_FAD_VECTORS(C1, C2, m*n);
3661  COMPARE_FAD_VECTORS(C1, C3, m*n);
3662  COMPARE_FAD_VECTORS(C1, C4, m*n);
3663  COMPARE_FAD_VECTORS(C1, C5, m*n);
3664 }
3665 
3666 // Tests with constant B
3667 template <class FadType, class ScalarType>
3668 void
3671 
3672  // SYMM is apparently not defined in the BLAS for complex types
3674  return;
3675 
3676  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3677  C4(m*n,ndot), C5(m*n,ndot);
3678  std::vector<ScalarType> b(m*n);
3679  for (unsigned int j=0; j<m; j++) {
3680  for (unsigned int i=0; i<m; i++) {
3681  A[i+j*m] = FadType(ndot, urand.number());
3682  for (unsigned int k=0; k<ndot; k++)
3683  A[i+j*m].fastAccessDx(k) = urand.number();
3684  }
3685  }
3686  for (unsigned int j=0; j<n; j++) {
3687  for (unsigned int i=0; i<m; i++) {
3688  b[i+j*m] = urand.number();
3689  B[i+j*m] = b[i+j*m];
3690  }
3691  }
3692  FadType alpha(ndot, urand.number());
3693  FadType beta(ndot, urand.number());
3694  for (unsigned int k=0; k<ndot; k++) {
3695  alpha.fastAccessDx(k) = urand.number();
3696  beta.fastAccessDx(k) = urand.number();
3697  }
3698 
3699  for (unsigned int j=0; j<n; j++) {
3700  for (unsigned int i=0; i<m; i++) {
3701  ScalarType val = urand.number();
3702  C1[i+j*m] = FadType(ndot, val);
3703  C2[i+j*m] = FadType(ndot, val);
3704  C3[i+j*m] = FadType(ndot, val);
3705  C4[i+j*m] = FadType(ndot, val);
3706  C5[i+j*m] = FadType(ndot, val);
3707  for (unsigned int k=0; k<ndot; k++) {
3708  val = urand.number();
3709  C1[i+j*m].fastAccessDx(k) = val;
3710  C2[i+j*m].fastAccessDx(k) = val;
3711  C3[i+j*m].fastAccessDx(k) = val;
3712  C4[i+j*m].fastAccessDx(k) = val;
3713  C5[i+j*m].fastAccessDx(k) = val;
3714  }
3715  }
3716  }
3717 
3718  Teuchos::BLAS<int,FadType> teuchos_blas;
3719  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720  &A[0], m, &B[0], m, beta, &C1[0], m);
3721 
3722  Teuchos::BLAS<int,FadType> sacado_blas(false);
3723  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3724  &A[0], m, &B[0], m, beta, &C2[0], m);
3725 
3726  COMPARE_FAD_VECTORS(C1, C2, m*n);
3727 
3728  unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3729  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3730  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3731  &A[0], m, &B[0], m, beta, &C3[0], m);
3732 
3733  COMPARE_FAD_VECTORS(C1, C3, m*n);
3734 
3735  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3736  &A[0], m, &b[0], m, beta, &C4[0], m);
3737 
3738  COMPARE_FAD_VECTORS(C1, C4, m*n);
3739 
3740  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3741  &A[0], m, &b[0], m, beta, &C5[0], m);
3742 
3743  COMPARE_FAD_VECTORS(C1, C5, m*n);
3744 
3745  // lower tri
3746  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747  &A[0], m, &B[0], m, beta, &C1[0], m);
3748  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749  &A[0], m, &B[0], m, beta, &C2[0], m);
3750  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751  &A[0], m, &B[0], m, beta, &C3[0], m);
3752  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3753  &A[0], m, &b[0], m, beta, &C4[0], m);
3754  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3755  &A[0], m, &b[0], m, beta, &C5[0], m);
3756 
3757  COMPARE_FAD_VECTORS(C1, C2, m*n);
3758  COMPARE_FAD_VECTORS(C1, C3, m*n);
3759  COMPARE_FAD_VECTORS(C1, C4, m*n);
3760  COMPARE_FAD_VECTORS(C1, C5, m*n);
3761 }
3762 
3763 // Tests with constant A and B
3764 template <class FadType, class ScalarType>
3765 void
3768 
3769  // SYMM is apparently not defined in the BLAS for complex types
3771  return;
3772 
3773  VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774  C4(m*n,ndot), C5(m*n,ndot);
3775  std::vector<ScalarType> a(m*m), b(m*n);
3776  for (unsigned int j=0; j<m; j++) {
3777  for (unsigned int i=0; i<m; i++) {
3778  a[i+j*m] = urand.number();
3779  A[i+j*m] = a[i+j*m];
3780  }
3781  }
3782  for (unsigned int j=0; j<n; j++) {
3783  for (unsigned int i=0; i<m; i++) {
3784  b[i+j*m] = urand.number();
3785  B[i+j*m] = b[i+j*m];
3786  }
3787  }
3788  FadType alpha(ndot, urand.number());
3789  FadType beta(ndot, urand.number());
3790  for (unsigned int k=0; k<ndot; k++) {
3791  alpha.fastAccessDx(k) = urand.number();
3792  beta.fastAccessDx(k) = urand.number();
3793  }
3794 
3795  for (unsigned int j=0; j<n; j++) {
3796  for (unsigned int i=0; i<m; i++) {
3797  ScalarType val = urand.number();
3798  C1[i+j*m] = FadType(ndot, val);
3799  C2[i+j*m] = FadType(ndot, val);
3800  C3[i+j*m] = FadType(ndot, val);
3801  C4[i+j*m] = FadType(ndot, val);
3802  C5[i+j*m] = FadType(ndot, val);
3803  for (unsigned int k=0; k<ndot; k++) {
3804  val = urand.number();
3805  C1[i+j*m].fastAccessDx(k) = val;
3806  C2[i+j*m].fastAccessDx(k) = val;
3807  C3[i+j*m].fastAccessDx(k) = val;
3808  C4[i+j*m].fastAccessDx(k) = val;
3809  C5[i+j*m].fastAccessDx(k) = val;
3810  }
3811  }
3812  }
3813 
3814  Teuchos::BLAS<int,FadType> teuchos_blas;
3815  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816  &A[0], m, &B[0], m, beta, &C1[0], m);
3817 
3818  Teuchos::BLAS<int,FadType> sacado_blas(false);
3819  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820  &A[0], m, &B[0], m, beta, &C2[0], m);
3821 
3822  COMPARE_FAD_VECTORS(C1, C2, m*n);
3823 
3824  unsigned int sz = m*m + m*n*(2+ndot);
3825  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3826  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827  &A[0], m, &B[0], m, beta, &C3[0], m);
3828 
3829  COMPARE_FAD_VECTORS(C1, C3, m*n);
3830 
3831  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832  &a[0], m, &b[0], m, beta, &C4[0], m);
3833 
3834  COMPARE_FAD_VECTORS(C1, C4, m*n);
3835 
3836  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837  &a[0], m, &b[0], m, beta, &C5[0], m);
3838 
3839  COMPARE_FAD_VECTORS(C1, C5, m*n);
3840 
3841  // lower tri
3842  teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843  &A[0], m, &B[0], m, beta, &C1[0], m);
3844  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845  &A[0], m, &B[0], m, beta, &C2[0], m);
3846  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847  &A[0], m, &B[0], m, beta, &C3[0], m);
3848  sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849  &a[0], m, &b[0], m, beta, &C4[0], m);
3850  sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851  &a[0], m, &b[0], m, beta, &C5[0], m);
3852 
3853  COMPARE_FAD_VECTORS(C1, C2, m*n);
3854  COMPARE_FAD_VECTORS(C1, C3, m*n);
3855  COMPARE_FAD_VECTORS(C1, C4, m*n);
3856  COMPARE_FAD_VECTORS(C1, C5, m*n);
3857 }
3858 
3859 // Tests all arguments, left side
3860 template <class FadType, class ScalarType>
3861 void
3864  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3865  for (unsigned int j=0; j<m; j++) {
3866  for (unsigned int i=0; i<m; i++) {
3867  A[i+j*m] = FadType(ndot, urand.number());
3868  for (unsigned int k=0; k<ndot; k++)
3869  A[i+j*m].fastAccessDx(k) = urand.number();
3870  }
3871  }
3872  FadType alpha(ndot, urand.number());
3873  for (unsigned int k=0; k<ndot; k++) {
3874  alpha.fastAccessDx(k) = urand.number();
3875  }
3876 
3877  for (unsigned int j=0; j<n; j++) {
3878  for (unsigned int i=0; i<m; i++) {
3879  ScalarType val = urand.number();
3880  B1[i+j*m] = FadType(ndot, val);
3881  B2[i+j*m] = FadType(ndot, val);
3882  B3[i+j*m] = FadType(ndot, val);
3883  for (unsigned int k=0; k<ndot; k++) {
3884  val = urand.number();
3885  B1[i+j*m].fastAccessDx(k) = val;
3886  B2[i+j*m].fastAccessDx(k) = val;
3887  B3[i+j*m].fastAccessDx(k) = val;
3888  }
3889  }
3890  }
3891 
3892  Teuchos::BLAS<int,FadType> teuchos_blas;
3894  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3895 
3896  Teuchos::BLAS<int,FadType> sacado_blas(false);
3898  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3899 
3900  COMPARE_FAD_VECTORS(B1, B2, m*n);
3901 
3902  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3903  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3905  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3906 
3907  COMPARE_FAD_VECTORS(B1, B3, m*n);
3908 
3910  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3912  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3914  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3915  COMPARE_FAD_VECTORS(B1, B2, m*n);
3916  COMPARE_FAD_VECTORS(B1, B3, m*n);
3917 
3919  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3921  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3923  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3924  COMPARE_FAD_VECTORS(B1, B2, m*n);
3925  COMPARE_FAD_VECTORS(B1, B3, m*n);
3926 
3927  for (unsigned int i=0; i<m; i++) {
3928  A[i*m+i].val() = 1.0;
3929  for (unsigned int k=0; k<ndot; k++)
3930  A[i*m+i].fastAccessDx(k) = 0.0;
3931  }
3933  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3935  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3937  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3938  COMPARE_FAD_VECTORS(B1, B2, m*n);
3939  COMPARE_FAD_VECTORS(B1, B3, m*n);
3940 }
3941 
3942 // Tests all arguments, right side
3943 template <class FadType, class ScalarType>
3944 void
3947  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3948  for (unsigned int j=0; j<n; j++) {
3949  for (unsigned int i=0; i<n; i++) {
3950  A[i+j*n] = FadType(ndot, urand.number());
3951  for (unsigned int k=0; k<ndot; k++)
3952  A[i+j*n].fastAccessDx(k) = urand.number();
3953  }
3954  }
3955  FadType alpha(ndot, urand.number());
3956  for (unsigned int k=0; k<ndot; k++) {
3957  alpha.fastAccessDx(k) = urand.number();
3958  }
3959 
3960  for (unsigned int j=0; j<n; j++) {
3961  for (unsigned int i=0; i<m; i++) {
3962  ScalarType val = urand.number();
3963  B1[i+j*m] = FadType(ndot, val);
3964  B2[i+j*m] = FadType(ndot, val);
3965  B3[i+j*m] = FadType(ndot, val);
3966  for (unsigned int k=0; k<ndot; k++) {
3967  val = urand.number();
3968  B1[i+j*m].fastAccessDx(k) = val;
3969  B2[i+j*m].fastAccessDx(k) = val;
3970  B3[i+j*m].fastAccessDx(k) = val;
3971  }
3972  }
3973  }
3974 
3975  Teuchos::BLAS<int,FadType> teuchos_blas;
3977  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3978 
3979  Teuchos::BLAS<int,FadType> sacado_blas(false);
3981  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3982 
3983  COMPARE_FAD_VECTORS(B1, B2, m*n);
3984 
3985  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3986  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3988  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3989 
3990  COMPARE_FAD_VECTORS(B1, B3, m*n);
3991 
3993  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3995  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3997  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3998  COMPARE_FAD_VECTORS(B1, B2, m*n);
3999  COMPARE_FAD_VECTORS(B1, B3, m*n);
4000 
4002  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4004  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4006  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4007  COMPARE_FAD_VECTORS(B1, B2, m*n);
4008  COMPARE_FAD_VECTORS(B1, B3, m*n);
4009 
4010  for (unsigned int i=0; i<n; i++) {
4011  A[i*n+i].val() = 1.0;
4012  for (unsigned int k=0; k<ndot; k++)
4013  A[i*n+i].fastAccessDx(k) = 0.0;
4014  }
4016  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4018  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4020  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4021  COMPARE_FAD_VECTORS(B1, B2, m*n);
4022  COMPARE_FAD_VECTORS(B1, B3, m*n);
4023 }
4024 
4025 // Tests all arguments, left side, different lda, ldb
4026 template <class FadType, class ScalarType>
4027 void
4030  unsigned int lda = m+4;
4031  unsigned int ldb = m+5;
4032  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4033  for (unsigned int j=0; j<m; j++) {
4034  for (unsigned int i=0; i<lda; i++) {
4035  A[i+j*lda] = FadType(ndot, urand.number());
4036  for (unsigned int k=0; k<ndot; k++)
4037  A[i+j*lda].fastAccessDx(k) = urand.number();
4038  }
4039  }
4040  FadType alpha(ndot, urand.number());
4041  for (unsigned int k=0; k<ndot; k++) {
4042  alpha.fastAccessDx(k) = urand.number();
4043  }
4044 
4045  for (unsigned int j=0; j<n; j++) {
4046  for (unsigned int i=0; i<ldb; i++) {
4047  ScalarType val = urand.number();
4048  B1[i+j*ldb] = FadType(ndot, val);
4049  B2[i+j*ldb] = FadType(ndot, val);
4050  B3[i+j*ldb] = FadType(ndot, val);
4051  for (unsigned int k=0; k<ndot; k++) {
4052  val = urand.number();
4053  B1[i+j*ldb].fastAccessDx(k) = val;
4054  B2[i+j*ldb].fastAccessDx(k) = val;
4055  B3[i+j*ldb].fastAccessDx(k) = val;
4056  }
4057  }
4058  }
4059 
4060  Teuchos::BLAS<int,FadType> teuchos_blas;
4062  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4063 
4064  Teuchos::BLAS<int,FadType> sacado_blas(false);
4066  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4067 
4068  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4069 
4070  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4071  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4073  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4074 
4075  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4076 
4078  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4080  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4082  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4083  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4084  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4085 
4087  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4089  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4091  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4092  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4093  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4094 
4095  for (unsigned int i=0; i<m; i++) {
4096  A[i*lda+i].val() = 1.0;
4097  for (unsigned int k=0; k<ndot; k++)
4098  A[i*lda+i].fastAccessDx(k) = 0.0;
4099  }
4101  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4103  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4105  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4106  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4107  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4108 }
4109 
4110 // Tests all arguments, right side, different lda, ldb
4111 template <class FadType, class ScalarType>
4112 void
4115  unsigned int lda = n+4;
4116  unsigned int ldb = m+5;
4117  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4118  for (unsigned int j=0; j<n; j++) {
4119  for (unsigned int i=0; i<lda; i++) {
4120  A[i+j*lda] = FadType(ndot, urand.number());
4121  for (unsigned int k=0; k<ndot; k++)
4122  A[i+j*lda].fastAccessDx(k) = urand.number();
4123  }
4124  }
4125  FadType alpha(ndot, urand.number());
4126  for (unsigned int k=0; k<ndot; k++) {
4127  alpha.fastAccessDx(k) = urand.number();
4128  }
4129 
4130  for (unsigned int j=0; j<n; j++) {
4131  for (unsigned int i=0; i<ldb; i++) {
4132  ScalarType val = urand.number();
4133  B1[i+j*ldb] = FadType(ndot, val);
4134  B2[i+j*ldb] = FadType(ndot, val);
4135  B3[i+j*ldb] = FadType(ndot, val);
4136  for (unsigned int k=0; k<ndot; k++) {
4137  val = urand.number();
4138  B1[i+j*ldb].fastAccessDx(k) = val;
4139  B2[i+j*ldb].fastAccessDx(k) = val;
4140  B3[i+j*ldb].fastAccessDx(k) = val;
4141  }
4142  }
4143  }
4144 
4145  Teuchos::BLAS<int,FadType> teuchos_blas;
4147  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4148 
4149  Teuchos::BLAS<int,FadType> sacado_blas(false);
4151  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4152 
4153  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4154 
4155  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4156  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4158  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4159 
4160  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4161 
4163  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4165  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4167  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4168  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4169  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4170 
4172  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4174  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4176  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4177  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4178  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4179 
4180  for (unsigned int i=0; i<n; i++) {
4181  A[i*lda+i].val() = 1.0;
4182  for (unsigned int k=0; k<ndot; k++)
4183  A[i*lda+i].fastAccessDx(k) = 0.0;
4184  }
4186  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4188  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4190  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4191  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4192  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4193 }
4194 
4195 // Tests constant alpha
4196 template <class FadType, class ScalarType>
4197 void
4200  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4201  for (unsigned int j=0; j<m; j++) {
4202  for (unsigned int i=0; i<m; i++) {
4203  A[i+j*m] = FadType(ndot, urand.number());
4204  for (unsigned int k=0; k<ndot; k++)
4205  A[i+j*m].fastAccessDx(k) = urand.number();
4206  }
4207  }
4208  ScalarType alpha = urand.number();
4209 
4210  for (unsigned int j=0; j<n; j++) {
4211  for (unsigned int i=0; i<m; i++) {
4212  ScalarType val = urand.number();
4213  B1[i+j*m] = FadType(ndot, val);
4214  B2[i+j*m] = FadType(ndot, val);
4215  B3[i+j*m] = FadType(ndot, val);
4216  for (unsigned int k=0; k<ndot; k++) {
4217  val = urand.number();
4218  B1[i+j*m].fastAccessDx(k) = val;
4219  B2[i+j*m].fastAccessDx(k) = val;
4220  B3[i+j*m].fastAccessDx(k) = val;
4221  }
4222  }
4223  }
4224 
4225  Teuchos::BLAS<int,FadType> teuchos_blas;
4227  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4228 
4229  Teuchos::BLAS<int,FadType> sacado_blas(false);
4231  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4232 
4233  COMPARE_FAD_VECTORS(B1, B2, m*n);
4234 
4235  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4236  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4238  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4239 
4240  COMPARE_FAD_VECTORS(B1, B3, m*n);
4241 
4243  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4245  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4247  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4248  COMPARE_FAD_VECTORS(B1, B2, m*n);
4249  COMPARE_FAD_VECTORS(B1, B3, m*n);
4250 
4252  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4254  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4256  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4257  COMPARE_FAD_VECTORS(B1, B2, m*n);
4258  COMPARE_FAD_VECTORS(B1, B3, m*n);
4259 
4260  for (unsigned int i=0; i<m; i++) {
4261  A[i*m+i].val() = 1.0;
4262  for (unsigned int k=0; k<ndot; k++)
4263  A[i*m+i].fastAccessDx(k) = 0.0;
4264  }
4266  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4268  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4270  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4271  COMPARE_FAD_VECTORS(B1, B2, m*n);
4272  COMPARE_FAD_VECTORS(B1, B3, m*n);
4273 }
4274 
4275 // Tests constant B
4276 template <class FadType, class ScalarType>
4277 void
4280  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4281  for (unsigned int j=0; j<m; j++) {
4282  for (unsigned int i=0; i<m; i++) {
4283  A[i+j*m] = FadType(ndot, urand.number());
4284  for (unsigned int k=0; k<ndot; k++)
4285  A[i+j*m].fastAccessDx(k) = urand.number();
4286  }
4287  }
4288  FadType alpha(ndot, urand.number());
4289  for (unsigned int k=0; k<ndot; k++) {
4290  alpha.fastAccessDx(k) = urand.number();
4291  }
4292 
4293  for (unsigned int j=0; j<n; j++) {
4294  for (unsigned int i=0; i<m; i++) {
4295  ScalarType val = urand.number();
4296  B1[i+j*m] = val;
4297  B2[i+j*m] = val;
4298  B3[i+j*m] = val;
4299  }
4300  }
4301 
4302  Teuchos::BLAS<int,FadType> teuchos_blas;
4304  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4305 
4306  Teuchos::BLAS<int,FadType> sacado_blas(false);
4308  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4309 
4310  COMPARE_FAD_VECTORS(B1, B2, m*n);
4311 
4312  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4313  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4315  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4316 
4317  COMPARE_FAD_VECTORS(B1, B3, m*n);
4318 
4320  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4322  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4324  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4325  COMPARE_FAD_VECTORS(B1, B2, m*n);
4326  COMPARE_FAD_VECTORS(B1, B3, m*n);
4327 
4329  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4331  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4333  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4334  COMPARE_FAD_VECTORS(B1, B2, m*n);
4335  COMPARE_FAD_VECTORS(B1, B3, m*n);
4336 
4337  for (unsigned int i=0; i<m; i++) {
4338  A[i*m+i].val() = 1.0;
4339  for (unsigned int k=0; k<ndot; k++)
4340  A[i*m+i].fastAccessDx(k) = 0.0;
4341  }
4343  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4345  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4347  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4348  COMPARE_FAD_VECTORS(B1, B2, m*n);
4349  COMPARE_FAD_VECTORS(B1, B3, m*n);
4350 }
4351 
4352 // Tests constant A
4353 template <class FadType, class ScalarType>
4354 void
4357  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4358  B4(m*n,ndot), B5(m*n,ndot);
4359  std::vector<ScalarType> a(m*m);
4360  for (unsigned int j=0; j<m; j++) {
4361  for (unsigned int i=0; i<m; i++) {
4362  a[i+j*m] = urand.number();
4363  A[i+j*m] = a[i+j*m];
4364  }
4365  }
4366  FadType alpha(ndot, urand.number());
4367  for (unsigned int k=0; k<ndot; k++) {
4368  alpha.fastAccessDx(k) = urand.number();
4369  }
4370 
4371  for (unsigned int j=0; j<n; j++) {
4372  for (unsigned int i=0; i<m; i++) {
4373  ScalarType val = urand.number();
4374  B1[i+j*m] = FadType(ndot, val);
4375  B2[i+j*m] = FadType(ndot, val);
4376  B3[i+j*m] = FadType(ndot, val);
4377  B4[i+j*m] = FadType(ndot, val);
4378  B5[i+j*m] = FadType(ndot, val);
4379  for (unsigned int k=0; k<ndot; k++) {
4380  val = urand.number();
4381  B1[i+j*m].fastAccessDx(k) = val;
4382  B2[i+j*m].fastAccessDx(k) = val;
4383  B3[i+j*m].fastAccessDx(k) = val;
4384  B4[i+j*m].fastAccessDx(k) = val;
4385  B5[i+j*m].fastAccessDx(k) = val;
4386  }
4387  }
4388  }
4389 
4390  Teuchos::BLAS<int,FadType> teuchos_blas;
4392  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4393 
4394  Teuchos::BLAS<int,FadType> sacado_blas(false);
4396  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4397 
4398  COMPARE_FAD_VECTORS(B1, B2, m*n);
4399 
4400  unsigned int sz = m*m + m*n*(1+ndot);
4401  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4403  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4404 
4405  COMPARE_FAD_VECTORS(B1, B3, m*n);
4406 
4408  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4409 
4410  COMPARE_FAD_VECTORS(B1, B4, m*n);
4411 
4413  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4414 
4415  COMPARE_FAD_VECTORS(B1, B5, m*n);
4416 
4418  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4420  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4422  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4424  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4426  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4427  COMPARE_FAD_VECTORS(B1, B2, m*n);
4428  COMPARE_FAD_VECTORS(B1, B3, m*n);
4429  COMPARE_FAD_VECTORS(B1, B4, m*n);
4430  COMPARE_FAD_VECTORS(B1, B5, m*n);
4431 
4433  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4435  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4437  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4439  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4441  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4442  COMPARE_FAD_VECTORS(B1, B2, m*n);
4443  COMPARE_FAD_VECTORS(B1, B3, m*n);
4444  COMPARE_FAD_VECTORS(B1, B4, m*n);
4445  COMPARE_FAD_VECTORS(B1, B5, m*n);
4446 
4447  for (unsigned int i=0; i<m; i++) {
4448  A[i*m+i].val() = 1.0;
4449  for (unsigned int k=0; k<ndot; k++)
4450  A[i*m+i].fastAccessDx(k) = 0.0;
4451  }
4453  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4455  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4457  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4459  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4461  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4462  COMPARE_FAD_VECTORS(B1, B2, m*n);
4463  COMPARE_FAD_VECTORS(B1, B3, m*n);
4464  COMPARE_FAD_VECTORS(B1, B4, m*n);
4465  COMPARE_FAD_VECTORS(B1, B5, m*n);
4466 }
4467 
4468 // Tests all arguments, left side
4469 template <class FadType, class ScalarType>
4470 void
4473  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4474  for (unsigned int j=0; j<m; j++) {
4475  for (unsigned int i=0; i<m; i++) {
4476  //A[i+j*m] = urand.number();
4477  A[i+j*m] = FadType(ndot, urand.number());
4478  for (unsigned int k=0; k<ndot; k++)
4479  A[i+j*m].fastAccessDx(k) = urand.number();
4480  }
4481  }
4482  FadType alpha(ndot, urand.number());
4483  for (unsigned int k=0; k<ndot; k++) {
4484  alpha.fastAccessDx(k) = urand.number();
4485  }
4486  //ScalarType alpha = urand.number();
4487 
4488  for (unsigned int j=0; j<n; j++) {
4489  for (unsigned int i=0; i<m; i++) {
4490  ScalarType val = urand.number();
4491  // B1[i+j*m] = val;
4492  // B2[i+j*m] = val;
4493  // B3[i+j*m] = val;
4494  B1[i+j*m] = FadType(ndot, val);
4495  B2[i+j*m] = FadType(ndot, val);
4496  B3[i+j*m] = FadType(ndot, val);
4497  for (unsigned int k=0; k<ndot; k++) {
4498  val = urand.number();
4499  B1[i+j*m].fastAccessDx(k) = val;
4500  B2[i+j*m].fastAccessDx(k) = val;
4501  B3[i+j*m].fastAccessDx(k) = val;
4502  }
4503  }
4504  }
4505 
4506  Teuchos::BLAS<int,FadType> teuchos_blas;
4508  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4509 
4510  Teuchos::BLAS<int,FadType> sacado_blas(false);
4512  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4513 
4514  COMPARE_FAD_VECTORS(B1, B2, m*n);
4515 
4516  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4517  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4519  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4520 
4521  COMPARE_FAD_VECTORS(B1, B3, m*n);
4522 
4524  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4526  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4528  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4529  COMPARE_FAD_VECTORS(B1, B2, m*n);
4530  COMPARE_FAD_VECTORS(B1, B3, m*n);
4531 
4533  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4535  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4537  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4538  COMPARE_FAD_VECTORS(B1, B2, m*n);
4539  COMPARE_FAD_VECTORS(B1, B3, m*n);
4540 
4541  for (unsigned int i=0; i<m; i++) {
4542  A[i*m+i].val() = 1.0;
4543  for (unsigned int k=0; k<ndot; k++)
4544  A[i*m+i].fastAccessDx(k) = 0.0;
4545  }
4547  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4549  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4551  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4552  COMPARE_FAD_VECTORS(B1, B2, m*n);
4553  COMPARE_FAD_VECTORS(B1, B3, m*n);
4554 }
4555 
4556 // Tests all arguments, right side
4557 template <class FadType, class ScalarType>
4558 void
4561  VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4562  for (unsigned int j=0; j<n; j++) {
4563  for (unsigned int i=0; i<n; i++) {
4564  A[i+j*n] = FadType(ndot, urand.number());
4565  for (unsigned int k=0; k<ndot; k++)
4566  A[i+j*n].fastAccessDx(k) = urand.number();
4567  }
4568  }
4569  FadType alpha(ndot, urand.number());
4570  for (unsigned int k=0; k<ndot; k++) {
4571  alpha.fastAccessDx(k) = urand.number();
4572  }
4573 
4574  for (unsigned int j=0; j<n; j++) {
4575  for (unsigned int i=0; i<m; i++) {
4576  ScalarType val = urand.number();
4577  B1[i+j*m] = FadType(ndot, val);
4578  B2[i+j*m] = FadType(ndot, val);
4579  B3[i+j*m] = FadType(ndot, val);
4580  for (unsigned int k=0; k<ndot; k++) {
4581  val = urand.number();
4582  B1[i+j*m].fastAccessDx(k) = val;
4583  B2[i+j*m].fastAccessDx(k) = val;
4584  B3[i+j*m].fastAccessDx(k) = val;
4585  }
4586  }
4587  }
4588 
4589  Teuchos::BLAS<int,FadType> teuchos_blas;
4591  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4592 
4593  Teuchos::BLAS<int,FadType> sacado_blas(false);
4595  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4596 
4597  COMPARE_FAD_VECTORS(B1, B2, m*n);
4598 
4599  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4600  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4602  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4603 
4604  COMPARE_FAD_VECTORS(B1, B3, m*n);
4605 
4607  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4609  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4611  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4612  COMPARE_FAD_VECTORS(B1, B2, m*n);
4613  COMPARE_FAD_VECTORS(B1, B3, m*n);
4614 
4616  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4618  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4620  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4621  COMPARE_FAD_VECTORS(B1, B2, m*n);
4622  COMPARE_FAD_VECTORS(B1, B3, m*n);
4623 
4624  for (unsigned int i=0; i<n; i++) {
4625  A[i*n+i].val() = 1.0;
4626  for (unsigned int k=0; k<ndot; k++)
4627  A[i*n+i].fastAccessDx(k) = 0.0;
4628  }
4630  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4632  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4634  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4635  COMPARE_FAD_VECTORS(B1, B2, m*n);
4636  COMPARE_FAD_VECTORS(B1, B3, m*n);
4637 }
4638 
4639 // Tests all arguments, left side, different lda, ldb
4640 template <class FadType, class ScalarType>
4641 void
4644  unsigned int lda = m+4;
4645  unsigned int ldb = m+5;
4646  VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4647  for (unsigned int j=0; j<m; j++) {
4648  for (unsigned int i=0; i<lda; i++) {
4649  A[i+j*lda] = FadType(ndot, urand.number());
4650  for (unsigned int k=0; k<ndot; k++)
4651  A[i+j*lda].fastAccessDx(k) = urand.number();
4652  }
4653  }
4654  FadType alpha(ndot, urand.number());
4655  for (unsigned int k=0; k<ndot; k++) {
4656  alpha.fastAccessDx(k) = urand.number();
4657  }
4658 
4659  for (unsigned int j=0; j<n; j++) {
4660  for (unsigned int i=0; i<ldb; i++) {
4661  ScalarType val = urand.number();
4662  B1[i+j*ldb] = FadType(ndot, val);
4663  B2[i+j*ldb] = FadType(ndot, val);
4664  B3[i+j*ldb] = FadType(ndot, val);
4665  for (unsigned int k=0; k<ndot; k++) {
4666  val = urand.number();
4667  B1[i+j*ldb].fastAccessDx(k) = val;
4668  B2[i+j*ldb].fastAccessDx(k) = val;
4669  B3[i+j*ldb].fastAccessDx(k) = val;
4670  }
4671  }
4672  }
4673 
4674  Teuchos::BLAS<int,FadType> teuchos_blas;
4676  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4677 
4678  Teuchos::BLAS<int,FadType> sacado_blas(false);
4680  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4681 
4682  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4683 
4684  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4685  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4687  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4688 
4689  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4690 
4692  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4694  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4696  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4697  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4698  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4699 
4701  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4703  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4705  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4706  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4707  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4708 
4709  for (unsigned int i=0; i<m; i++) {
4710  A[i*lda+i].val() = 1.0;
4711  for (unsigned int k=0; k<ndot; k++)
4712  A[i*lda+i].fastAccessDx(k) = 0.0;
4713  }
4715  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4717  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4719  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4720  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4721  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4722 }
4723 
4724 // Tests all arguments, right side, different lda, ldb
4725 template <class FadType, class ScalarType>
4726 void
4729  unsigned int lda = n+4;
4730  unsigned int ldb = m+5;
4731  VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4732  for (unsigned int j=0; j<n; j++) {
4733  for (unsigned int i=0; i<lda; i++) {
4734  A[i+j*lda] = FadType(ndot, urand.number());
4735  for (unsigned int k=0; k<ndot; k++)
4736  A[i+j*lda].fastAccessDx(k) = urand.number();
4737  }
4738  }
4739  FadType alpha(ndot, urand.number());
4740  for (unsigned int k=0; k<ndot; k++) {
4741  alpha.fastAccessDx(k) = urand.number();
4742  }
4743 
4744  for (unsigned int j=0; j<n; j++) {
4745  for (unsigned int i=0; i<ldb; i++) {
4746  ScalarType val = urand.number();
4747  B1[i+j*ldb] = FadType(ndot, val);
4748  B2[i+j*ldb] = FadType(ndot, val);
4749  B3[i+j*ldb] = FadType(ndot, val);
4750  for (unsigned int k=0; k<ndot; k++) {
4751  val = urand.number();
4752  B1[i+j*ldb].fastAccessDx(k) = val;
4753  B2[i+j*ldb].fastAccessDx(k) = val;
4754  B3[i+j*ldb].fastAccessDx(k) = val;
4755  }
4756  }
4757  }
4758 
4759  Teuchos::BLAS<int,FadType> teuchos_blas;
4761  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4762 
4763  Teuchos::BLAS<int,FadType> sacado_blas(false);
4765  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4766 
4767  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4768 
4769  unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4770  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4772  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4773 
4774  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4775 
4777  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4779  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4781  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4782  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4783  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4784 
4786  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4788  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4790  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4791  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4792  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4793 
4794  for (unsigned int i=0; i<n; i++) {
4795  A[i*lda+i].val() = 1.0;
4796  for (unsigned int k=0; k<ndot; k++)
4797  A[i*lda+i].fastAccessDx(k) = 0.0;
4798  }
4800  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4802  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4804  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4805  COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4806  COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4807 }
4808 
4809 // Tests constant alpha
4810 template <class FadType, class ScalarType>
4811 void
4814  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4815  for (unsigned int j=0; j<m; j++) {
4816  for (unsigned int i=0; i<m; i++) {
4817  A[i+j*m] = FadType(ndot, urand.number());
4818  for (unsigned int k=0; k<ndot; k++)
4819  A[i+j*m].fastAccessDx(k) = urand.number();
4820  }
4821  }
4822  ScalarType alpha = urand.number();
4823 
4824  for (unsigned int j=0; j<n; j++) {
4825  for (unsigned int i=0; i<m; i++) {
4826  ScalarType val = urand.number();
4827  B1[i+j*m] = FadType(ndot, val);
4828  B2[i+j*m] = FadType(ndot, val);
4829  B3[i+j*m] = FadType(ndot, val);
4830  for (unsigned int k=0; k<ndot; k++) {
4831  val = urand.number();
4832  B1[i+j*m].fastAccessDx(k) = val;
4833  B2[i+j*m].fastAccessDx(k) = val;
4834  B3[i+j*m].fastAccessDx(k) = val;
4835  }
4836  }
4837  }
4838 
4839  Teuchos::BLAS<int,FadType> teuchos_blas;
4841  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4842 
4843  Teuchos::BLAS<int,FadType> sacado_blas(false);
4845  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4846 
4847  COMPARE_FAD_VECTORS(B1, B2, m*n);
4848 
4849  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4850  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4852  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4853 
4854  COMPARE_FAD_VECTORS(B1, B3, m*n);
4855 
4857  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4859  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4861  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4862  COMPARE_FAD_VECTORS(B1, B2, m*n);
4863  COMPARE_FAD_VECTORS(B1, B3, m*n);
4864 
4866  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4868  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4870  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4871  COMPARE_FAD_VECTORS(B1, B2, m*n);
4872  COMPARE_FAD_VECTORS(B1, B3, m*n);
4873 
4874  for (unsigned int i=0; i<m; i++) {
4875  A[i*m+i].val() = 1.0;
4876  for (unsigned int k=0; k<ndot; k++)
4877  A[i*m+i].fastAccessDx(k) = 0.0;
4878  }
4880  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4882  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4884  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4885  COMPARE_FAD_VECTORS(B1, B2, m*n);
4886  COMPARE_FAD_VECTORS(B1, B3, m*n);
4887 }
4888 
4889 // Tests constant B
4890 template <class FadType, class ScalarType>
4891 void
4894  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4895  for (unsigned int j=0; j<m; j++) {
4896  for (unsigned int i=0; i<m; i++) {
4897  A[i+j*m] = FadType(ndot, urand.number());
4898  for (unsigned int k=0; k<ndot; k++)
4899  A[i+j*m].fastAccessDx(k) = urand.number();
4900  }
4901  }
4902  FadType alpha(ndot, urand.number());
4903  for (unsigned int k=0; k<ndot; k++) {
4904  alpha.fastAccessDx(k) = urand.number();
4905  }
4906 
4907  for (unsigned int j=0; j<n; j++) {
4908  for (unsigned int i=0; i<m; i++) {
4909  ScalarType val = urand.number();
4910  B1[i+j*m] = val;
4911  B2[i+j*m] = val;
4912  B3[i+j*m] = val;
4913  }
4914  }
4915 
4916  Teuchos::BLAS<int,FadType> teuchos_blas;
4918  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4919 
4920  Teuchos::BLAS<int,FadType> sacado_blas(false);
4922  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4923 
4924  COMPARE_FAD_VECTORS(B1, B2, m*n);
4925 
4926  unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4927  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4929  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4930 
4931  COMPARE_FAD_VECTORS(B1, B3, m*n);
4932 
4934  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4936  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4938  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4939  COMPARE_FAD_VECTORS(B1, B2, m*n);
4940  COMPARE_FAD_VECTORS(B1, B3, m*n);
4941 
4943  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4945  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4947  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4948  COMPARE_FAD_VECTORS(B1, B2, m*n);
4949  COMPARE_FAD_VECTORS(B1, B3, m*n);
4950 
4951  for (unsigned int i=0; i<m; i++) {
4952  A[i*m+i].val() = 1.0;
4953  for (unsigned int k=0; k<ndot; k++)
4954  A[i*m+i].fastAccessDx(k) = 0.0;
4955  }
4957  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4959  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4961  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4962  COMPARE_FAD_VECTORS(B1, B2, m*n);
4963  COMPARE_FAD_VECTORS(B1, B3, m*n);
4964 }
4965 
4966 // Tests constant A
4967 template <class FadType, class ScalarType>
4968 void
4971  VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4972  B4(m*n,ndot), B5(m*n,ndot);
4973  std::vector<ScalarType> a(m*m);
4974  for (unsigned int j=0; j<m; j++) {
4975  for (unsigned int i=0; i<m; i++) {
4976  a[i+j*m] = urand.number();
4977  A[i+j*m] = a[i+j*m];
4978  }
4979  }
4980  FadType alpha(ndot, urand.number());
4981  for (unsigned int k=0; k<ndot; k++) {
4982  alpha.fastAccessDx(k) = urand.number();
4983  }
4984 
4985  for (unsigned int j=0; j<n; j++) {
4986  for (unsigned int i=0; i<m; i++) {
4987  ScalarType val = urand.number();
4988  B1[i+j*m] = FadType(ndot, val);
4989  B2[i+j*m] = FadType(ndot, val);
4990  B3[i+j*m] = FadType(ndot, val);
4991  B4[i+j*m] = FadType(ndot, val);
4992  B5[i+j*m] = FadType(ndot, val);
4993  for (unsigned int k=0; k<ndot; k++) {
4994  val = urand.number();
4995  B1[i+j*m].fastAccessDx(k) = val;
4996  B2[i+j*m].fastAccessDx(k) = val;
4997  B3[i+j*m].fastAccessDx(k) = val;
4998  B4[i+j*m].fastAccessDx(k) = val;
4999  B5[i+j*m].fastAccessDx(k) = val;
5000  }
5001  }
5002  }
5003 
5004  Teuchos::BLAS<int,FadType> teuchos_blas;
5006  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5007 
5008  Teuchos::BLAS<int,FadType> sacado_blas(false);
5010  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5011 
5012  COMPARE_FAD_VECTORS(B1, B2, m*n);
5013 
5014  unsigned int sz = m*m + m*n*(1+ndot);
5015  Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
5017  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5018 
5019  COMPARE_FAD_VECTORS(B1, B3, m*n);
5020 
5022  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5023 
5024  COMPARE_FAD_VECTORS(B1, B4, m*n);
5025 
5027  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5028 
5029  COMPARE_FAD_VECTORS(B1, B5, m*n);
5030 
5032  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5034  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5036  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5038  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5040  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5041  COMPARE_FAD_VECTORS(B1, B2, m*n);
5042  COMPARE_FAD_VECTORS(B1, B3, m*n);
5043  COMPARE_FAD_VECTORS(B1, B4, m*n);
5044  COMPARE_FAD_VECTORS(B1, B5, m*n);
5045 
5047  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5049  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5051  Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5053  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5055  Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5056  COMPARE_FAD_VECTORS(B1, B2, m*n);
5057  COMPARE_FAD_VECTORS(B1, B3, m*n);
5058  COMPARE_FAD_VECTORS(B1, B4, m*n);
5059  COMPARE_FAD_VECTORS(B1, B5, m*n);
5060 
5061  for (unsigned int i=0; i<m; i++) {
5062  A[i*m+i].val() = 1.0;
5063  for (unsigned int k=0; k<ndot; k++)
5064  A[i*m+i].fastAccessDx(k) = 0.0;
5065  }
5067  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5069  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5071  Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5073  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5075  Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5076  COMPARE_FAD_VECTORS(B1, B2, m*n);
5077  COMPARE_FAD_VECTORS(B1, B3, m*n);
5078  COMPARE_FAD_VECTORS(B1, B4, m*n);
5079  COMPARE_FAD_VECTORS(B1, B5, m*n);
5080 }
5081 
5082 #undef COMPARE_VALUES
5083 #undef COMPARE_FADS
5084 #undef COMPARE_FAD_VECTORS
5085 
5086 #endif // FADBLASUNITTESTS_HPP
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
CPPUNIT_TEST(testSCAL1)
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Random< ScalarType > urand
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
CPPUNIT_TEST_SUITE(FadBLASUnitTests)
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
#define COMPARE_FADS(a, b)
Sacado::Random< double > real_urand
expr val()
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Sacado::Fad::Vector< unsigned int, FadType > VectorType
#define A
Definition: Sacado_rad.hpp:572
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
int n