32 #ifndef FADBLASUNITTESTS_HPP
33 #define FADBLASUNITTESTS_HPP
41 #include <cppunit/extensions/HelperMacros.h>
43 #define COMPARE_VALUES(a, b) \
44 CPPUNIT_ASSERT( std::abs(a-b) < this->tol_a + this->tol_r*std::abs(a) );
46 #define COMPARE_FADS(a, b) \
47 CPPUNIT_ASSERT(a.size() == b.size()); \
48 CPPUNIT_ASSERT(a.hasFastAccess() == b.hasFastAccess()); \
49 COMPARE_VALUES(a.val(), b.val()); \
50 for (int k=0; k<a.size(); k++) { \
51 COMPARE_VALUES(a.dx(k), b.dx(k)); \
52 COMPARE_VALUES(a.fastAccessDx(k), b.fastAccessDx(k)); \
56 #define COMPARE_FAD_VECTORS(X1, X2, n) \
57 CPPUNIT_ASSERT(X1.size() == std::size_t(n)); \
58 CPPUNIT_ASSERT(X2.size() == std::size_t(n)); \
59 for (unsigned int i=0; i<n; i++) { \
60 COMPARE_FADS(X1[i], X2[i]); \
65 template <
class FadType,
class ScalarType>
162 double absolute_tolerance,
double relative_tolerance);
275 template <
class FadType,
class ScalarType>
278 urand(), real_urand(), m(5),
n(6), l(4), ndot(7), tol_a(1.0e-11), tol_r(1.0e-11) {}
280 template <
class FadType,
class ScalarType>
283 double relative_tolerance) :
290 tol_a(absolute_tolerance),
291 tol_r(relative_tolerance) {}
293 template <
class FadType,
class ScalarType>
297 template <
class FadType,
class ScalarType>
302 template <
class FadType,
class ScalarType>
306 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
307 for (
unsigned int i=0; i<m; i++) {
308 ScalarType
val = urand.number();
312 for (
unsigned int k=0; k<ndot; k++) {
313 val = urand.number();
314 x1[i].fastAccessDx(k) =
val;
315 x2[i].fastAccessDx(k) =
val;
316 x3[i].fastAccessDx(k) =
val;
319 FadType alpha(ndot, urand.number());
320 for (
unsigned int k=0; k<ndot; k++) {
321 alpha.fastAccessDx(k) = urand.number();
325 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
328 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
332 unsigned int sz = m*(1+ndot);
334 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
340 template <
class FadType,
class ScalarType>
344 unsigned int incx = 2;
345 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
346 for (
unsigned int i=0; i<m*incx; i++) {
347 ScalarType
val = urand.number();
351 for (
unsigned int k=0; k<ndot; k++) {
352 val = urand.number();
353 x1[i].fastAccessDx(k) =
val;
354 x2[i].fastAccessDx(k) =
val;
355 x3[i].fastAccessDx(k) =
val;
358 FadType alpha(ndot, urand.number());
359 for (
unsigned int k=0; k<ndot; k++) {
360 alpha.fastAccessDx(k) = urand.number();
364 teuchos_blas.
SCAL(m, alpha, &x1[0], incx);
367 sacado_blas.
SCAL(m, alpha, &x2[0], incx);
371 unsigned int sz = m*(1+ndot);
373 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
379 template <
class FadType,
class ScalarType>
383 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
384 for (
unsigned int i=0; i<m; i++) {
385 ScalarType
val = urand.number();
389 for (
unsigned int k=0; k<ndot; k++) {
390 val = urand.number();
391 x1[i].fastAccessDx(k) =
val;
392 x2[i].fastAccessDx(k) =
val;
393 x3[i].fastAccessDx(k) =
val;
396 ScalarType alpha = urand.number();
399 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
402 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
406 unsigned int sz = m*(1+ndot);
408 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
414 template <
class FadType,
class ScalarType>
418 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
419 for (
unsigned int i=0; i<m; i++) {
420 ScalarType
val = urand.number();
426 for (
unsigned int k=0; k<ndot; k++)
427 alpha.fastAccessDx(k) = urand.number();
430 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
433 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
437 unsigned int sz = m*(1+ndot);
439 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
445 template <
class FadType,
class ScalarType>
449 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
450 for (
unsigned int i=0; i<m; i++) {
451 x[i] =
FadType(ndot, urand.number());
452 ScalarType
val = urand.number();
456 for (
unsigned int k=0; k<ndot; k++) {
457 x[i].fastAccessDx(k) = urand.number();
458 val = urand.number();
459 y1[i].fastAccessDx(k) =
val;
460 y2[i].fastAccessDx(k) =
val;
461 y3[i].fastAccessDx(k) =
val;
466 teuchos_blas.
COPY(m, &x[0], 1, &y1[0], 1);
469 sacado_blas.
COPY(m, &x[0], 1, &y2[0], 1);
473 unsigned int sz = 2*m*(1+ndot);
475 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
481 template <
class FadType,
class ScalarType>
485 unsigned int incx = 2;
486 unsigned int incy = 3;
487 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
488 for (
unsigned int i=0; i<m*incx; i++) {
489 x[i] =
FadType(ndot, urand.number());
490 for (
unsigned int k=0; k<ndot; k++) {
491 x[i].fastAccessDx(k) = urand.number();
494 for (
unsigned int i=0; i<m*incy; i++) {
495 ScalarType
val = urand.number();
499 for (
unsigned int k=0; k<ndot; k++) {
500 val = urand.number();
501 y1[i].fastAccessDx(k) =
val;
502 y2[i].fastAccessDx(k) =
val;
503 y3[i].fastAccessDx(k) =
val;
508 teuchos_blas.
COPY(m, &x[0], incx, &y1[0], incy);
511 sacado_blas.
COPY(m, &x[0], incx, &y2[0], incy);
515 unsigned int sz = 2*m*(1+ndot);
517 sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
523 template <
class FadType,
class ScalarType>
527 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
528 for (
unsigned int i=0; i<m; i++) {
529 x[i] = urand.number();
531 for (
unsigned int i=0; i<m; i++) {
532 ScalarType
val = urand.number();
536 for (
unsigned int k=0; k<ndot; k++) {
537 val = urand.number();
538 y1[i].fastAccessDx(k) =
val;
539 y2[i].fastAccessDx(k) =
val;
540 y3[i].fastAccessDx(k) =
val;
545 teuchos_blas.
COPY(m, &x[0], 1, &y1[0], 1);
548 sacado_blas.
COPY(m, &x[0], 1, &y2[0], 1);
552 unsigned int sz = 2*m*(1+ndot);
554 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
560 template <
class FadType,
class ScalarType>
564 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
565 for (
unsigned int i=0; i<m; i++) {
566 x[i] =
FadType(ndot, urand.number());
567 ScalarType
val = urand.number();
571 for (
unsigned int k=0; k<ndot; k++) {
572 x[i].fastAccessDx(k) = urand.number();
577 teuchos_blas.
COPY(m, &x[0], 1, &y1[0], 1);
580 sacado_blas.
COPY(m, &x[0], 1, &y2[0], 1);
584 unsigned int sz = 2*m*(1+ndot);
586 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
592 template <
class FadType,
class ScalarType>
596 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
597 for (
unsigned int i=0; i<m; i++) {
598 x[i] =
FadType(ndot, urand.number());
599 ScalarType
val = urand.number();
603 for (
unsigned int k=0; k<ndot; k++) {
604 x[i].fastAccessDx(k) = urand.number();
605 val = urand.number();
606 y1[i].fastAccessDx(k) =
val;
607 y2[i].fastAccessDx(k) =
val;
608 y3[i].fastAccessDx(k) =
val;
611 FadType alpha(ndot, urand.number());
612 for (
unsigned int k=0; k<ndot; k++)
613 alpha.fastAccessDx(k) = urand.number();
616 teuchos_blas.
AXPY(m, alpha, &x[0], 1, &y1[0], 1);
619 sacado_blas.
AXPY(m, alpha, &x[0], 1, &y2[0], 1);
623 unsigned int sz = 2*m*(1+ndot);
625 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
631 template <
class FadType,
class ScalarType>
635 unsigned int incx = 2;
636 unsigned int incy = 3;
637 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
638 for (
unsigned int i=0; i<m*incx; i++) {
639 x[i] =
FadType(ndot, urand.number());
640 for (
unsigned int k=0; k<ndot; k++) {
641 x[i].fastAccessDx(k) = urand.number();
644 for (
unsigned int i=0; i<m*incy; i++) {
645 ScalarType
val = urand.number();
649 for (
unsigned int k=0; k<ndot; k++) {
650 val = urand.number();
651 y1[i].fastAccessDx(k) =
val;
652 y2[i].fastAccessDx(k) =
val;
653 y3[i].fastAccessDx(k) =
val;
656 FadType alpha(ndot, urand.number());
657 for (
unsigned int k=0; k<ndot; k++)
658 alpha.fastAccessDx(k) = urand.number();
661 teuchos_blas.
AXPY(m, alpha, &x[0], incx, &y1[0], incy);
664 sacado_blas.
AXPY(m, alpha, &x[0], incx, &y2[0], incy);
668 unsigned int sz = 2*m*(1+ndot);
670 sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
676 template <
class FadType,
class ScalarType>
680 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
681 std::vector<ScalarType> xx(m);
682 for (
unsigned int i=0; i<m; i++) {
683 xx[i] = urand.number();
685 ScalarType
val = urand.number();
690 for (
unsigned int k=0; k<ndot; k++) {
691 val = urand.number();
692 y1[i].fastAccessDx(k) =
val;
693 y2[i].fastAccessDx(k) =
val;
694 y3[i].fastAccessDx(k) =
val;
695 y4[i].fastAccessDx(k) =
val;
698 FadType alpha(ndot, urand.number());
699 for (
unsigned int k=0; k<ndot; k++)
700 alpha.fastAccessDx(k) = urand.number();
703 teuchos_blas.
AXPY(m, alpha, &x[0], 1, &y1[0], 1);
706 sacado_blas.
AXPY(m, alpha, &x[0], 1, &y2[0], 1);
710 unsigned int sz = m*(1+ndot)+m;
712 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
716 sacado_blas.
AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
722 template <
class FadType,
class ScalarType>
726 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
727 for (
unsigned int i=0; i<m; i++) {
728 x[i] =
FadType(ndot, urand.number());
729 ScalarType
val = urand.number();
733 for (
unsigned int k=0; k<ndot; k++) {
734 x[i].fastAccessDx(k) = urand.number();
737 FadType alpha(ndot, urand.number());
738 for (
unsigned int k=0; k<ndot; k++)
739 alpha.fastAccessDx(k) = urand.number();
742 teuchos_blas.
AXPY(m, alpha, &x[0], 1, &y1[0], 1);
745 sacado_blas.
AXPY(m, alpha, &x[0], 1, &y2[0], 1);
749 unsigned int sz = 2*m*(1+ndot);
751 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
757 template <
class FadType,
class ScalarType>
762 for (
unsigned int i=0; i<m; i++) {
763 X[i] =
FadType(ndot, real_urand.number());
764 Y[i] =
FadType(ndot, real_urand.number());
765 for (
unsigned int k=0; k<ndot; k++) {
766 X[i].fastAccessDx(k) = real_urand.number();
767 Y[i].fastAccessDx(k) = real_urand.number();
772 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
775 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
779 unsigned int sz = 2*m*(1+ndot);
781 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
787 template <
class FadType,
class ScalarType>
791 unsigned int incx = 2;
792 unsigned int incy = 3;
794 for (
unsigned int i=0; i<m*incx; i++) {
795 X[i] =
FadType(ndot, real_urand.number());
796 for (
unsigned int k=0; k<ndot; k++) {
797 X[i].fastAccessDx(k) = real_urand.number();
800 for (
unsigned int i=0; i<m*incy; i++) {
801 Y[i] =
FadType(ndot, real_urand.number());
802 for (
unsigned int k=0; k<ndot; k++) {
803 Y[i].fastAccessDx(k) = real_urand.number();
808 FadType z1 = teuchos_blas.
DOT(m, &X[0], incx, &Y[0], incy);
811 FadType z2 = sacado_blas.
DOT(m, &X[0], incx, &Y[0], incy);
815 unsigned int sz = 2*m*(1+ndot);
817 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
823 template <
class FadType,
class ScalarType>
828 std::vector<ScalarType> x(m);
829 for (
unsigned int i=0; i<m; i++) {
830 x[i] = urand.number();
832 Y[i] =
FadType(ndot, real_urand.number());
833 for (
unsigned int k=0; k<ndot; k++) {
834 Y[i].fastAccessDx(k) = real_urand.number();
839 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
842 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
846 unsigned int sz = 2*m*(1+ndot);
848 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
852 FadType z4 = sacado_blas.
DOT(m, &x[0], 1, &Y[0], 1);
858 template <
class FadType,
class ScalarType>
863 std::vector<ScalarType> y(m);
864 for (
unsigned int i=0; i<m; i++) {
865 X[i] =
FadType(ndot, real_urand.number());
866 y[i] = urand.number();
868 for (
unsigned int k=0; k<ndot; k++) {
869 X[i].fastAccessDx(k) = real_urand.number();
874 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
877 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
881 unsigned int sz = 2*m*(1+ndot);
883 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
887 FadType z4 = sacado_blas.
DOT(m, &X[0], 1, &y[0], 1);
893 template <
class FadType,
class ScalarType>
898 for (
unsigned int i=0; i<m; i++) {
899 X[i] =
FadType(ndot, real_urand.number());
900 for (
unsigned int k=0; k<ndot; k++) {
901 X[i].fastAccessDx(k) = real_urand.number();
907 teuchos_blas.
NRM2(m, &X[0], 1);
911 sacado_blas.
NRM2(m, &X[0], 1);
915 unsigned int sz = m*(1+ndot);
918 sacado_blas2.NRM2(m, &X[0], 1);
924 template <
class FadType,
class ScalarType>
928 unsigned int incx = 2;
930 for (
unsigned int i=0; i<m*incx; i++) {
931 X[i] =
FadType(ndot, real_urand.number());
932 for (
unsigned int k=0; k<ndot; k++) {
933 X[i].fastAccessDx(k) = real_urand.number();
939 teuchos_blas.
NRM2(m, &X[0], incx);
943 sacado_blas.
NRM2(m, &X[0], incx);
947 unsigned int sz = m*(1+ndot);
950 sacado_blas2.NRM2(m, &X[0], incx);
956 template <
class FadType,
class ScalarType>
960 VectorType A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
961 for (
unsigned int j=0; j<n; j++) {
962 for (
unsigned int i=0; i<m; i++) {
963 A[i+j*m] =
FadType(ndot, urand.number());
964 for (
unsigned int k=0; k<ndot; k++)
967 B[j] =
FadType(ndot, urand.number());
968 for (
unsigned int k=0; k<ndot; k++)
971 FadType alpha(ndot, urand.number());
972 FadType beta(ndot, urand.number());
973 for (
unsigned int k=0; k<ndot; k++) {
974 alpha.fastAccessDx(k) = urand.number();
975 beta.fastAccessDx(k) = urand.number();
978 for (
unsigned int i=0; i<m; i++) {
979 ScalarType
val = urand.number();
983 for (
unsigned int k=0; k<ndot; k++) {
984 val = urand.number();
985 C1[i].fastAccessDx(k) =
val;
986 C2[i].fastAccessDx(k) =
val;
987 C3[i].fastAccessDx(k) =
val;
1001 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1010 template <
class FadType,
class ScalarType>
1014 unsigned int lda = m+3;
1015 unsigned int incb = 2;
1016 unsigned int incc = 3;
1017 VectorType A(lda*
n,ndot),
B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
1019 for (
unsigned int j=0; j<n; j++) {
1020 for (
unsigned int i=0; i<lda; i++) {
1021 A[i+j*lda] =
FadType(ndot, urand.number());
1022 for (
unsigned int k=0; k<ndot; k++)
1026 for (
unsigned int j=0; j<n*incb; j++) {
1027 B[j] =
FadType(ndot, urand.number());
1028 for (
unsigned int k=0; k<ndot; k++)
1031 FadType alpha(ndot, urand.number());
1032 FadType beta(ndot, urand.number());
1033 for (
unsigned int k=0; k<ndot; k++) {
1034 alpha.fastAccessDx(k) = urand.number();
1035 beta.fastAccessDx(k) = urand.number();
1038 for (
unsigned int i=0; i<m*incc; i++) {
1039 ScalarType
val = urand.number();
1043 for (
unsigned int k=0; k<ndot; k++) {
1044 val = urand.number();
1045 C1[i].fastAccessDx(k) =
val;
1046 C2[i].fastAccessDx(k) =
val;
1047 C3[i].fastAccessDx(k) =
val;
1053 beta, &C1[0], incc);
1057 beta, &C2[0], incc);
1061 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1064 beta, &C3[0], incc);
1070 template <
class FadType,
class ScalarType>
1074 VectorType A(m*
n,ndot),
B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
1075 for (
unsigned int j=0; j<n; j++) {
1076 for (
unsigned int i=0; i<m; i++) {
1077 A[i+j*m] =
FadType(ndot, urand.number());
1078 for (
unsigned int k=0; k<ndot; k++)
1082 for (
unsigned int j=0; j<m; j++) {
1083 B[j] =
FadType(ndot, urand.number());
1084 for (
unsigned int k=0; k<ndot; k++)
1087 FadType alpha(ndot, urand.number());
1088 FadType beta(ndot, urand.number());
1089 for (
unsigned int k=0; k<ndot; k++) {
1090 alpha.fastAccessDx(k) = urand.number();
1091 beta.fastAccessDx(k) = urand.number();
1094 for (
unsigned int i=0; i<n; i++) {
1095 ScalarType
val = urand.number();
1099 for (
unsigned int k=0; k<ndot; k++) {
1100 val = urand.number();
1101 C1[i].fastAccessDx(k) =
val;
1102 C2[i].fastAccessDx(k) =
val;
1103 C3[i].fastAccessDx(k) =
val;
1117 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1126 template <
class FadType,
class ScalarType>
1130 unsigned int lda = m+3;
1131 unsigned int incb = 2;
1132 unsigned int incc = 3;
1133 VectorType A(lda*
n,ndot),
B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
1135 for (
unsigned int j=0; j<n; j++) {
1136 for (
unsigned int i=0; i<lda; i++) {
1137 A[i+j*lda] =
FadType(ndot, urand.number());
1138 for (
unsigned int k=0; k<ndot; k++)
1142 for (
unsigned int j=0; j<m*incb; j++) {
1143 B[j] =
FadType(ndot, urand.number());
1144 for (
unsigned int k=0; k<ndot; k++)
1147 FadType alpha(ndot, urand.number());
1148 FadType beta(ndot, urand.number());
1149 for (
unsigned int k=0; k<ndot; k++) {
1150 alpha.fastAccessDx(k) = urand.number();
1151 beta.fastAccessDx(k) = urand.number();
1154 for (
unsigned int i=0; i<n*incc; i++) {
1155 ScalarType
val = urand.number();
1159 for (
unsigned int k=0; k<ndot; k++) {
1160 val = urand.number();
1161 C1[i].fastAccessDx(k) =
val;
1162 C2[i].fastAccessDx(k) =
val;
1163 C3[i].fastAccessDx(k) =
val;
1169 beta, &C1[0], incc);
1173 beta, &C2[0], incc);
1177 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1180 beta, &C3[0], incc);
1186 template <
class FadType,
class ScalarType>
1190 VectorType A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1191 for (
unsigned int j=0; j<n; j++) {
1192 for (
unsigned int i=0; i<m; i++) {
1193 A[i+j*m] =
FadType(ndot, urand.number());
1194 for (
unsigned int k=0; k<ndot; k++)
1197 B[j] =
FadType(ndot, urand.number());
1198 for (
unsigned int k=0; k<ndot; k++)
1201 FadType alpha(ndot, urand.number());
1202 FadType beta(ndot, urand.number());
1203 for (
unsigned int k=0; k<ndot; k++) {
1204 alpha.fastAccessDx(k) = urand.number();
1205 beta.fastAccessDx(k) = urand.number();
1208 for (
unsigned int i=0; i<m; i++) {
1209 ScalarType
val = urand.number();
1225 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1234 template <
class FadType,
class ScalarType>
1238 VectorType A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1239 for (
unsigned int j=0; j<n; j++) {
1240 for (
unsigned int i=0; i<m; i++) {
1241 A[i+j*m] =
FadType(ndot, urand.number());
1242 for (
unsigned int k=0; k<ndot; k++)
1245 B[j] =
FadType(ndot, urand.number());
1246 for (
unsigned int k=0; k<ndot; k++)
1249 ScalarType alpha = urand.number();
1250 ScalarType beta = urand.number();
1252 for (
unsigned int i=0; i<m; i++) {
1253 ScalarType
val = urand.number();
1257 for (
unsigned int k=0; k<ndot; k++) {
1258 val = urand.number();
1259 C1[i].fastAccessDx(k) =
val;
1260 C2[i].fastAccessDx(k) =
val;
1261 C3[i].fastAccessDx(k) =
val;
1275 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1284 template <
class FadType,
class ScalarType>
1288 VectorType A(m*
n,ndot),
B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1290 std::vector<ScalarType> b(n);
1291 for (
unsigned int j=0; j<n; j++) {
1292 for (
unsigned int i=0; i<m; i++) {
1293 A[i+j*m] =
FadType(ndot, urand.number());
1294 for (
unsigned int k=0; k<ndot; k++)
1297 b[j] = urand.number();
1300 FadType alpha(ndot, urand.number());
1301 FadType beta(ndot, urand.number());
1302 for (
unsigned int k=0; k<ndot; k++) {
1303 alpha.fastAccessDx(k) = urand.number();
1304 beta.fastAccessDx(k) = urand.number();
1307 for (
unsigned int i=0; i<m; i++) {
1308 ScalarType
val = urand.number();
1313 for (
unsigned int k=0; k<ndot; k++) {
1314 val = urand.number();
1315 C1[i].fastAccessDx(k) =
val;
1316 C2[i].fastAccessDx(k) =
val;
1317 C3[i].fastAccessDx(k) =
val;
1318 C4[i].fastAccessDx(k) =
val;
1332 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1346 template <
class FadType,
class ScalarType>
1350 VectorType A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1352 std::vector<ScalarType>
a(m*n);
1353 for (
unsigned int j=0; j<n; j++) {
1354 for (
unsigned int i=0; i<m; i++) {
1355 a[i+j*m] = urand.number();
1356 A[i+j*m] = a[i+j*m];
1358 B[j] =
FadType(ndot, urand.number());
1359 for (
unsigned int k=0; k<ndot; k++)
1362 FadType alpha(ndot, urand.number());
1363 FadType beta(ndot, urand.number());
1364 for (
unsigned int k=0; k<ndot; k++) {
1365 alpha.fastAccessDx(k) = urand.number();
1366 beta.fastAccessDx(k) = urand.number();
1369 for (
unsigned int i=0; i<m; i++) {
1370 ScalarType
val = urand.number();
1375 for (
unsigned int k=0; k<ndot; k++) {
1376 val = urand.number();
1377 C1[i].fastAccessDx(k) =
val;
1378 C2[i].fastAccessDx(k) =
val;
1379 C3[i].fastAccessDx(k) =
val;
1380 C4[i].fastAccessDx(k) =
val;
1394 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1408 template <
class FadType,
class ScalarType>
1412 VectorType A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1414 std::vector<ScalarType>
a(m*n), b(n);
1415 for (
unsigned int j=0; j<n; j++) {
1416 for (
unsigned int i=0; i<m; i++) {
1417 a[i+j*m] = urand.number();
1418 A[i+j*m] =
a[i+j*m];
1420 b[j] = urand.number();
1423 FadType alpha(ndot, urand.number());
1424 FadType beta(ndot, urand.number());
1425 for (
unsigned int k=0; k<ndot; k++) {
1426 alpha.fastAccessDx(k) = urand.number();
1427 beta.fastAccessDx(k) = urand.number();
1430 for (
unsigned int i=0; i<m; i++) {
1431 ScalarType
val = urand.number();
1436 for (
unsigned int k=0; k<ndot; k++) {
1437 val = urand.number();
1438 C1[i].fastAccessDx(k) =
val;
1439 C2[i].fastAccessDx(k) =
val;
1440 C3[i].fastAccessDx(k) =
val;
1441 C4[i].fastAccessDx(k) =
val;
1455 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1469 template <
class FadType,
class ScalarType>
1474 for (
unsigned int j=0; j<n; j++) {
1475 for (
unsigned int i=0; i<n; i++) {
1476 A[i+j*n] =
FadType(ndot, urand.number());
1477 for (
unsigned int k=0; k<ndot; k++)
1480 ScalarType
val = urand.number();
1484 for (
unsigned int k=0; k<ndot; k++) {
1485 val = urand.number();
1486 x1[j].fastAccessDx(k) =
val;
1487 x2[j].fastAccessDx(k) =
val;
1488 x3[j].fastAccessDx(k) =
val;
1502 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1527 for (
unsigned int i=0; i<n; i++) {
1528 A[i*n+i].val() = 1.0;
1529 for (
unsigned int k=0; k<ndot; k++)
1543 template <
class FadType,
class ScalarType>
1547 unsigned int lda =
n+3;
1548 unsigned int incx = 2;
1549 VectorType A(lda*
n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1550 for (
unsigned int j=0; j<n; j++) {
1551 for (
unsigned int i=0; i<lda; i++) {
1552 A[i+j*lda] =
FadType(ndot, urand.number());
1553 for (
unsigned int k=0; k<ndot; k++)
1557 for (
unsigned int j=0; j<n*incx; j++) {
1558 ScalarType
val = urand.number();
1562 for (
unsigned int k=0; k<ndot; k++) {
1563 val = urand.number();
1564 x1[j].fastAccessDx(k) =
val;
1565 x2[j].fastAccessDx(k) =
val;
1566 x3[j].fastAccessDx(k) =
val;
1580 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1605 for (
unsigned int i=0; i<n; i++) {
1606 A[i*lda+i].val() = 1.0;
1607 for (
unsigned int k=0; k<ndot; k++)
1621 template <
class FadType,
class ScalarType>
1625 VectorType A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1627 std::vector<ScalarType>
a(n*n);
1628 for (
unsigned int j=0; j<n; j++) {
1629 for (
unsigned int i=0; i<n; i++) {
1630 a[i+j*n] = urand.number();
1631 A[i+j*n] = a[i+j*n];
1633 ScalarType
val = urand.number();
1639 for (
unsigned int k=0; k<ndot; k++) {
1640 val = urand.number();
1641 x1[j].fastAccessDx(k) =
val;
1642 x2[j].fastAccessDx(k) =
val;
1643 x3[j].fastAccessDx(k) =
val;
1644 x4[j].fastAccessDx(k) =
val;
1645 x5[j].fastAccessDx(k) =
val;
1659 unsigned int sz = n*n+n*(1+ndot);
1706 for (
unsigned int i=0; i<n; i++) {
1707 A[i*n+i].val() = 1.0;
1708 for (
unsigned int k=0; k<ndot; k++)
1728 template <
class FadType,
class ScalarType>
1733 for (
unsigned int j=0; j<n; j++) {
1734 for (
unsigned int i=0; i<n; i++) {
1735 A[i+j*n] =
FadType(ndot, urand.number());
1736 for (
unsigned int k=0; k<ndot; k++)
1739 ScalarType
val = urand.number();
1755 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1780 for (
unsigned int i=0; i<n; i++) {
1781 A[i*n+i].val() = 1.0;
1782 for (
unsigned int k=0; k<ndot; k++)
1796 template <
class FadType,
class ScalarType>
1805 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1806 for (
unsigned int j=0; j<n; j++) {
1807 for (
unsigned int i=0; i<m; i++) {
1808 ScalarType
val = urand.number();
1809 A1[i+j*m] =
FadType(ndot, val);
1810 A2[i+j*m] =
FadType(ndot, val);
1811 A3[i+j*m] =
FadType(ndot, val);
1812 for (
unsigned int k=0; k<ndot; k++) {
1813 val = urand.number();
1814 A1[i+j*m].fastAccessDx(k) =
val;
1815 A2[i+j*m].fastAccessDx(k) =
val;
1816 A3[i+j*m].fastAccessDx(k) =
val;
1820 for (
unsigned int i=0; i<m; i++) {
1821 x[i] =
FadType(ndot, urand.number());
1822 for (
unsigned int k=0; k<ndot; k++)
1825 for (
unsigned int i=0; i<n; i++) {
1826 y[i] =
FadType(ndot, urand.number());
1827 for (
unsigned int k=0; k<ndot; k++)
1830 FadType alpha(ndot, urand.number());
1831 for (
unsigned int k=0; k<ndot; k++) {
1832 alpha.fastAccessDx(k) = urand.number();
1836 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1839 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1843 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1845 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1851 template <
class FadType,
class ScalarType>
1860 unsigned int lda = m+3;
1861 unsigned int incx = 2;
1862 unsigned int incy = 3;
1863 VectorType A1(lda*
n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1865 for (
unsigned int j=0; j<n; j++) {
1866 for (
unsigned int i=0; i<lda; i++) {
1867 ScalarType
val = urand.number();
1868 A1[i+j*lda] =
FadType(ndot, val);
1869 A2[i+j*lda] =
FadType(ndot, val);
1870 A3[i+j*lda] =
FadType(ndot, val);
1871 for (
unsigned int k=0; k<ndot; k++) {
1872 val = urand.number();
1873 A1[i+j*lda].fastAccessDx(k) =
val;
1874 A2[i+j*lda].fastAccessDx(k) =
val;
1875 A3[i+j*lda].fastAccessDx(k) =
val;
1879 for (
unsigned int i=0; i<m*incx; i++) {
1880 x[i] =
FadType(ndot, urand.number());
1881 for (
unsigned int k=0; k<ndot; k++)
1884 for (
unsigned int i=0; i<n*incy; i++) {
1885 y[i] =
FadType(ndot, urand.number());
1886 for (
unsigned int k=0; k<ndot; k++)
1889 FadType alpha(ndot, urand.number());
1890 for (
unsigned int k=0; k<ndot; k++) {
1891 alpha.fastAccessDx(k) = urand.number();
1895 teuchos_blas.
GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1898 sacado_blas.
GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1902 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1904 sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1910 template <
class FadType,
class ScalarType>
1919 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1920 for (
unsigned int j=0; j<n; j++) {
1921 for (
unsigned int i=0; i<m; i++) {
1922 ScalarType
val = urand.number();
1923 A1[i+j*m] =
FadType(ndot, val);
1924 A2[i+j*m] =
FadType(ndot, val);
1925 A3[i+j*m] =
FadType(ndot, val);
1926 for (
unsigned int k=0; k<ndot; k++) {
1927 val = urand.number();
1928 A1[i+j*m].fastAccessDx(k) =
val;
1929 A2[i+j*m].fastAccessDx(k) =
val;
1930 A3[i+j*m].fastAccessDx(k) =
val;
1934 for (
unsigned int i=0; i<m; i++) {
1935 x[i] =
FadType(ndot, urand.number());
1936 for (
unsigned int k=0; k<ndot; k++)
1939 for (
unsigned int i=0; i<n; i++) {
1940 y[i] =
FadType(ndot, urand.number());
1941 for (
unsigned int k=0; k<ndot; k++)
1944 ScalarType alpha = urand.number();
1947 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1950 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1954 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1956 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1962 template <
class FadType,
class ScalarType>
1971 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1972 A5(m*n,ndot), x(m,ndot), y(n,ndot);
1973 std::vector<ScalarType> xx(m);
1974 for (
unsigned int j=0; j<n; j++) {
1975 for (
unsigned int i=0; i<m; i++) {
1976 ScalarType
val = urand.number();
1977 A1[i+j*m] =
FadType(ndot, val);
1978 A2[i+j*m] =
FadType(ndot, val);
1979 A3[i+j*m] =
FadType(ndot, val);
1980 A4[i+j*m] =
FadType(ndot, val);
1981 A5[i+j*m] =
FadType(ndot, val);
1982 for (
unsigned int k=0; k<ndot; k++) {
1983 val = urand.number();
1984 A1[i+j*m].fastAccessDx(k) =
val;
1985 A2[i+j*m].fastAccessDx(k) =
val;
1986 A3[i+j*m].fastAccessDx(k) =
val;
1987 A4[i+j*m].fastAccessDx(k) =
val;
1988 A5[i+j*m].fastAccessDx(k) =
val;
1992 for (
unsigned int i=0; i<m; i++) {
1993 xx[i] = urand.number();
1996 for (
unsigned int i=0; i<n; i++) {
1997 y[i] =
FadType(ndot, urand.number());
1998 for (
unsigned int k=0; k<ndot; k++)
2001 FadType alpha(ndot, urand.number());
2002 for (
unsigned int k=0; k<ndot; k++) {
2003 alpha.fastAccessDx(k) = urand.number();
2007 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2010 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2014 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
2016 sacado_blas2.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2020 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
2024 sacado_blas2.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
2030 template <
class FadType,
class ScalarType>
2039 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2040 A5(m*n,ndot), x(m,ndot), y(n,ndot);
2041 std::vector<ScalarType> yy(n);
2042 for (
unsigned int j=0; j<n; j++) {
2043 for (
unsigned int i=0; i<m; i++) {
2044 ScalarType
val = urand.number();
2045 A1[i+j*m] =
FadType(ndot, val);
2046 A2[i+j*m] =
FadType(ndot, val);
2047 A3[i+j*m] =
FadType(ndot, val);
2048 A4[i+j*m] =
FadType(ndot, val);
2049 A5[i+j*m] =
FadType(ndot, val);
2050 for (
unsigned int k=0; k<ndot; k++) {
2051 val = urand.number();
2052 A1[i+j*m].fastAccessDx(k) =
val;
2053 A2[i+j*m].fastAccessDx(k) =
val;
2054 A3[i+j*m].fastAccessDx(k) =
val;
2055 A4[i+j*m].fastAccessDx(k) =
val;
2056 A5[i+j*m].fastAccessDx(k) =
val;
2060 for (
unsigned int i=0; i<m; i++) {
2061 x[i] =
FadType(ndot, urand.number());
2062 for (
unsigned int k=0; k<ndot; k++)
2065 for (
unsigned int i=0; i<n; i++) {
2066 yy[i] = urand.number();
2069 FadType alpha(ndot, urand.number());
2070 for (
unsigned int k=0; k<ndot; k++) {
2071 alpha.fastAccessDx(k) = urand.number();
2075 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2078 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2082 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
2084 sacado_blas2.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2088 sacado_blas.
GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
2092 sacado_blas2.
GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
2098 template <
class FadType,
class ScalarType>
2107 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2108 A5(m*n,ndot), x(m,ndot), y(n,ndot);
2109 std::vector<ScalarType> xx(n), yy(n);
2110 for (
unsigned int j=0; j<n; j++) {
2111 for (
unsigned int i=0; i<m; i++) {
2112 ScalarType
val = urand.number();
2113 A1[i+j*m] =
FadType(ndot, val);
2114 A2[i+j*m] =
FadType(ndot, val);
2115 A3[i+j*m] =
FadType(ndot, val);
2116 A4[i+j*m] =
FadType(ndot, val);
2117 A5[i+j*m] =
FadType(ndot, val);
2118 for (
unsigned int k=0; k<ndot; k++) {
2119 val = urand.number();
2120 A1[i+j*m].fastAccessDx(k) =
val;
2121 A2[i+j*m].fastAccessDx(k) =
val;
2122 A3[i+j*m].fastAccessDx(k) =
val;
2123 A4[i+j*m].fastAccessDx(k) =
val;
2124 A5[i+j*m].fastAccessDx(k) =
val;
2128 for (
unsigned int i=0; i<m; i++) {
2129 xx[i] = urand.number();
2132 for (
unsigned int i=0; i<n; i++) {
2133 yy[i] = urand.number();
2136 FadType alpha(ndot, urand.number());
2137 for (
unsigned int k=0; k<ndot; k++) {
2138 alpha.fastAccessDx(k) = urand.number();
2142 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2145 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2149 unsigned int sz = m*n*(1+ndot) + m + n;
2151 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2155 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2159 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2165 template <
class FadType,
class ScalarType>
2174 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2175 for (
unsigned int j=0; j<n; j++) {
2176 for (
unsigned int i=0; i<m; i++) {
2177 ScalarType
val = urand.number();
2183 for (
unsigned int i=0; i<m; i++) {
2184 x[i] =
FadType(ndot, urand.number());
2185 for (
unsigned int k=0; k<ndot; k++)
2188 for (
unsigned int i=0; i<n; i++) {
2189 y[i] =
FadType(ndot, urand.number());
2190 for (
unsigned int k=0; k<ndot; k++)
2193 FadType alpha(ndot, urand.number());
2194 for (
unsigned int k=0; k<ndot; k++) {
2195 alpha.fastAccessDx(k) = urand.number();
2199 teuchos_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2202 sacado_blas.
GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2206 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2208 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2214 template <
class FadType,
class ScalarType>
2218 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2219 for (
unsigned int j=0; j<l; j++) {
2220 for (
unsigned int i=0; i<m; i++) {
2221 A[i+j*m] =
FadType(ndot, urand.number());
2222 for (
unsigned int k=0; k<ndot; k++)
2226 for (
unsigned int j=0; j<n; j++) {
2227 for (
unsigned int i=0; i<l; i++) {
2228 B[i+j*l] =
FadType(ndot, urand.number());
2229 for (
unsigned int k=0; k<ndot; k++)
2233 FadType alpha(ndot, urand.number());
2234 FadType beta(ndot, urand.number());
2235 for (
unsigned int k=0; k<ndot; k++) {
2236 alpha.fastAccessDx(k) = urand.number();
2237 beta.fastAccessDx(k) = urand.number();
2240 for (
unsigned int j=0; j<n; j++) {
2241 for (
unsigned int i=0; i<m; i++) {
2242 ScalarType
val = urand.number();
2243 C1[i+j*m] =
FadType(ndot, val);
2244 C2[i+j*m] =
FadType(ndot, val);
2245 C3[i+j*m] =
FadType(ndot, val);
2246 for (
unsigned int k=0; k<ndot; k++) {
2247 val = urand.number();
2248 C1[i+j*m].fastAccessDx(k) =
val;
2249 C2[i+j*m].fastAccessDx(k) =
val;
2250 C3[i+j*m].fastAccessDx(k) =
val;
2257 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2261 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2265 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2268 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2274 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2276 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2278 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2285 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2287 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2289 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2296 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2298 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2300 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2307 template <
class FadType,
class ScalarType>
2311 unsigned int lda = m+4;
2312 unsigned int ldb = l+4;
2313 unsigned int ldc = m+5;
2314 VectorType A(lda*l,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2316 for (
unsigned int j=0; j<l; j++) {
2317 for (
unsigned int i=0; i<lda; i++) {
2318 A[i+j*lda] =
FadType(ndot, urand.number());
2319 for (
unsigned int k=0; k<ndot; k++)
2323 for (
unsigned int j=0; j<n; j++) {
2324 for (
unsigned int i=0; i<ldb; i++) {
2325 B[i+j*ldb] =
FadType(ndot, urand.number());
2326 for (
unsigned int k=0; k<ndot; k++)
2330 FadType alpha(ndot, urand.number());
2331 FadType beta(ndot, urand.number());
2332 for (
unsigned int k=0; k<ndot; k++) {
2333 alpha.fastAccessDx(k) = urand.number();
2334 beta.fastAccessDx(k) = urand.number();
2337 for (
unsigned int j=0; j<n; j++) {
2338 for (
unsigned int i=0; i<ldc; i++) {
2339 ScalarType
val = urand.number();
2340 C1[i+j*ldc] =
FadType(ndot, val);
2341 C2[i+j*ldc] =
FadType(ndot, val);
2342 C3[i+j*ldc] =
FadType(ndot, val);
2343 for (
unsigned int k=0; k<ndot; k++) {
2344 val = urand.number();
2345 C1[i+j*ldc].fastAccessDx(k) =
val;
2346 C2[i+j*ldc].fastAccessDx(k) =
val;
2347 C3[i+j*ldc].fastAccessDx(k) =
val;
2354 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2358 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2362 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2365 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2371 template <
class FadType,
class ScalarType>
2375 unsigned int lda = l+3;
2376 unsigned int ldb = l+4;
2377 unsigned int ldc = m+5;
2378 VectorType A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2380 for (
unsigned int j=0; j<m; j++) {
2381 for (
unsigned int i=0; i<lda; i++) {
2382 A[i+j*lda] =
FadType(ndot, urand.number());
2383 for (
unsigned int k=0; k<ndot; k++)
2387 for (
unsigned int j=0; j<n; j++) {
2388 for (
unsigned int i=0; i<ldb; i++) {
2389 B[i+j*ldb] =
FadType(ndot, urand.number());
2390 for (
unsigned int k=0; k<ndot; k++)
2394 FadType alpha(ndot, urand.number());
2395 FadType beta(ndot, urand.number());
2396 for (
unsigned int k=0; k<ndot; k++) {
2397 alpha.fastAccessDx(k) = urand.number();
2398 beta.fastAccessDx(k) = urand.number();
2401 for (
unsigned int j=0; j<n; j++) {
2402 for (
unsigned int i=0; i<ldc; i++) {
2403 ScalarType
val = urand.number();
2404 C1[i+j*ldc] =
FadType(ndot, val);
2405 C2[i+j*ldc] =
FadType(ndot, val);
2406 C3[i+j*ldc] =
FadType(ndot, val);
2407 for (
unsigned int k=0; k<ndot; k++) {
2408 val = urand.number();
2409 C1[i+j*ldc].fastAccessDx(k) =
val;
2410 C2[i+j*ldc].fastAccessDx(k) =
val;
2411 C3[i+j*ldc].fastAccessDx(k) =
val;
2418 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2422 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2426 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2429 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2435 template <
class FadType,
class ScalarType>
2439 unsigned int lda = m+4;
2440 unsigned int ldb =
n+4;
2441 unsigned int ldc = m+5;
2442 VectorType A(lda*l,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2444 for (
unsigned int j=0; j<l; j++) {
2445 for (
unsigned int i=0; i<lda; i++) {
2446 A[i+j*lda] =
FadType(ndot, urand.number());
2447 for (
unsigned int k=0; k<ndot; k++)
2451 for (
unsigned int j=0; j<l; j++) {
2452 for (
unsigned int i=0; i<ldb; i++) {
2453 B[i+j*ldb] =
FadType(ndot, urand.number());
2454 for (
unsigned int k=0; k<ndot; k++)
2458 FadType alpha(ndot, urand.number());
2459 FadType beta(ndot, urand.number());
2460 for (
unsigned int k=0; k<ndot; k++) {
2461 alpha.fastAccessDx(k) = urand.number();
2462 beta.fastAccessDx(k) = urand.number();
2465 for (
unsigned int j=0; j<n; j++) {
2466 for (
unsigned int i=0; i<ldc; i++) {
2467 ScalarType
val = urand.number();
2468 C1[i+j*ldc] =
FadType(ndot, val);
2469 C2[i+j*ldc] =
FadType(ndot, val);
2470 C3[i+j*ldc] =
FadType(ndot, val);
2471 for (
unsigned int k=0; k<ndot; k++) {
2472 val = urand.number();
2473 C1[i+j*ldc].fastAccessDx(k) =
val;
2474 C2[i+j*ldc].fastAccessDx(k) =
val;
2475 C3[i+j*ldc].fastAccessDx(k) =
val;
2482 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2486 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2490 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2493 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2499 template <
class FadType,
class ScalarType>
2503 unsigned int lda = l+3;
2504 unsigned int ldb =
n+4;
2505 unsigned int ldc = m+5;
2506 VectorType A(lda*m,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2508 for (
unsigned int j=0; j<m; j++) {
2509 for (
unsigned int i=0; i<lda; i++) {
2510 A[i+j*lda] =
FadType(ndot, urand.number());
2511 for (
unsigned int k=0; k<ndot; k++)
2515 for (
unsigned int j=0; j<l; j++) {
2516 for (
unsigned int i=0; i<ldb; i++) {
2517 B[i+j*ldb] =
FadType(ndot, urand.number());
2518 for (
unsigned int k=0; k<ndot; k++)
2522 FadType alpha(ndot, urand.number());
2523 FadType beta(ndot, urand.number());
2524 for (
unsigned int k=0; k<ndot; k++) {
2525 alpha.fastAccessDx(k) = urand.number();
2526 beta.fastAccessDx(k) = urand.number();
2529 for (
unsigned int j=0; j<n; j++) {
2530 for (
unsigned int i=0; i<ldc; i++) {
2531 ScalarType
val = urand.number();
2532 C1[i+j*ldc] =
FadType(ndot, val);
2533 C2[i+j*ldc] =
FadType(ndot, val);
2534 C3[i+j*ldc] =
FadType(ndot, val);
2535 for (
unsigned int k=0; k<ndot; k++) {
2536 val = urand.number();
2537 C1[i+j*ldc].fastAccessDx(k) =
val;
2538 C2[i+j*ldc].fastAccessDx(k) =
val;
2539 C3[i+j*ldc].fastAccessDx(k) =
val;
2546 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2550 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2554 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2557 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2563 template <
class FadType,
class ScalarType>
2567 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2568 for (
unsigned int j=0; j<l; j++) {
2569 for (
unsigned int i=0; i<m; i++) {
2570 A[i+j*m] =
FadType(ndot, urand.number());
2571 for (
unsigned int k=0; k<ndot; k++)
2575 for (
unsigned int j=0; j<n; j++) {
2576 for (
unsigned int i=0; i<l; i++) {
2577 B[i+j*l] =
FadType(ndot, urand.number());
2578 for (
unsigned int k=0; k<ndot; k++)
2582 FadType alpha(ndot, urand.number());
2583 FadType beta(ndot, urand.number());
2584 for (
unsigned int k=0; k<ndot; k++) {
2585 alpha.fastAccessDx(k) = urand.number();
2586 beta.fastAccessDx(k) = urand.number();
2589 for (
unsigned int j=0; j<n; j++) {
2590 for (
unsigned int i=0; i<m; i++) {
2591 ScalarType
val = urand.number();
2600 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2604 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2608 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2611 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2617 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2619 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2621 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2628 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2630 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2632 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2639 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2641 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2643 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2650 template <
class FadType,
class ScalarType>
2654 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2655 for (
unsigned int j=0; j<l; j++) {
2656 for (
unsigned int i=0; i<m; i++) {
2657 A[i+j*m] =
FadType(ndot, urand.number());
2658 for (
unsigned int k=0; k<ndot; k++)
2662 for (
unsigned int j=0; j<n; j++) {
2663 for (
unsigned int i=0; i<l; i++) {
2664 B[i+j*l] =
FadType(ndot, urand.number());
2665 for (
unsigned int k=0; k<ndot; k++)
2669 ScalarType alpha = urand.number();
2670 ScalarType beta = urand.number();
2672 for (
unsigned int j=0; j<n; j++) {
2673 for (
unsigned int i=0; i<m; i++) {
2674 ScalarType
val = urand.number();
2675 C1[i+j*m] =
FadType(ndot, val);
2676 C2[i+j*m] =
FadType(ndot, val);
2677 C3[i+j*m] =
FadType(ndot, val);
2678 for (
unsigned int k=0; k<ndot; k++) {
2679 val = urand.number();
2680 C1[i+j*m].fastAccessDx(k) =
val;
2681 C2[i+j*m].fastAccessDx(k) =
val;
2682 C3[i+j*m].fastAccessDx(k) =
val;
2689 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2693 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2697 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2700 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2706 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2708 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2710 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2717 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2719 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2721 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2728 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2730 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2732 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2739 template <
class FadType,
class ScalarType>
2743 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2744 C4(m*n,ndot), C5(m*n,ndot);
2745 std::vector<ScalarType>
a(m*l);
2746 for (
unsigned int j=0; j<l; j++) {
2747 for (
unsigned int i=0; i<m; i++) {
2748 a[i+j*m] = urand.number();
2749 A[i+j*m] = a[i+j*m];
2752 for (
unsigned int j=0; j<n; j++) {
2753 for (
unsigned int i=0; i<l; i++) {
2754 B[i+j*l] =
FadType(ndot, urand.number());
2755 for (
unsigned int k=0; k<ndot; k++)
2759 FadType alpha(ndot, urand.number());
2760 FadType beta(ndot, urand.number());
2761 for (
unsigned int k=0; k<ndot; k++) {
2762 alpha.fastAccessDx(k) = urand.number();
2763 beta.fastAccessDx(k) = urand.number();
2766 for (
unsigned int j=0; j<n; j++) {
2767 for (
unsigned int i=0; i<m; i++) {
2768 ScalarType
val = urand.number();
2769 C1[i+j*m] =
FadType(ndot, val);
2770 C2[i+j*m] =
FadType(ndot, val);
2771 C3[i+j*m] =
FadType(ndot, val);
2772 C4[i+j*m] =
FadType(ndot, val);
2773 C5[i+j*m] =
FadType(ndot, val);
2774 for (
unsigned int k=0; k<ndot; k++) {
2775 val = urand.number();
2776 C1[i+j*m].fastAccessDx(k) =
val;
2777 C2[i+j*m].fastAccessDx(k) =
val;
2778 C3[i+j*m].fastAccessDx(k) =
val;
2779 C4[i+j*m].fastAccessDx(k) =
val;
2780 C5[i+j*m].fastAccessDx(k) =
val;
2787 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2791 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2795 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2798 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2803 &a[0], m, &
B[0], l, beta, &C4[0], m);
2808 &a[0], m, &
B[0], l, beta, &C5[0], m);
2814 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2816 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2818 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2820 &a[0], l, &
B[0], l, beta, &C4[0], m);
2822 &a[0], l, &
B[0], l, beta, &C5[0], m);
2831 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2833 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2835 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2837 &a[0], m, &
B[0], n, beta, &C4[0], m);
2839 &a[0], m, &
B[0], n, beta, &C5[0], m);
2848 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2850 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2852 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2854 &a[0], l, &
B[0], n, beta, &C4[0], m);
2856 &a[0], l, &
B[0], n, beta, &C5[0], m);
2865 template <
class FadType,
class ScalarType>
2869 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2870 C4(m*n,ndot), C5(m*n,ndot);
2871 std::vector<ScalarType> b(l*n);
2872 for (
unsigned int j=0; j<l; j++) {
2873 for (
unsigned int i=0; i<m; i++) {
2874 A[i+j*m] =
FadType(ndot, urand.number());
2875 for (
unsigned int k=0; k<ndot; k++)
2879 for (
unsigned int j=0; j<n; j++) {
2880 for (
unsigned int i=0; i<l; i++) {
2881 b[i+j*l] = urand.number();
2882 B[i+j*l] = b[i+j*l];
2885 FadType alpha(ndot, urand.number());
2886 FadType beta(ndot, urand.number());
2887 for (
unsigned int k=0; k<ndot; k++) {
2888 alpha.fastAccessDx(k) = urand.number();
2889 beta.fastAccessDx(k) = urand.number();
2892 for (
unsigned int j=0; j<n; j++) {
2893 for (
unsigned int i=0; i<m; i++) {
2894 ScalarType
val = urand.number();
2895 C1[i+j*m] =
FadType(ndot, val);
2896 C2[i+j*m] =
FadType(ndot, val);
2897 C3[i+j*m] =
FadType(ndot, val);
2898 C4[i+j*m] =
FadType(ndot, val);
2899 C5[i+j*m] =
FadType(ndot, val);
2900 for (
unsigned int k=0; k<ndot; k++) {
2901 val = urand.number();
2902 C1[i+j*m].fastAccessDx(k) =
val;
2903 C2[i+j*m].fastAccessDx(k) =
val;
2904 C3[i+j*m].fastAccessDx(k) =
val;
2905 C4[i+j*m].fastAccessDx(k) =
val;
2906 C5[i+j*m].fastAccessDx(k) =
val;
2913 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2917 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2921 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2924 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2929 &
A[0], m, &b[0], l, beta, &C4[0], m);
2934 &
A[0], m, &b[0], l, beta, &C5[0], m);
2940 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2942 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2944 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2946 &
A[0], l, &b[0], l, beta, &C4[0], m);
2948 &
A[0], l, &b[0], l, beta, &C5[0], m);
2957 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2959 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2961 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2963 &
A[0], m, &b[0], n, beta, &C4[0], m);
2965 &
A[0], m, &b[0], n, beta, &C5[0], m);
2974 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2976 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2978 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2980 &
A[0], l, &b[0], n, beta, &C4[0], m);
2982 &
A[0], l, &b[0], n, beta, &C5[0], m);
2991 template <
class FadType,
class ScalarType>
2995 VectorType A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2996 C4(m*n,ndot), C5(m*n,ndot);
2997 std::vector<ScalarType>
a(m*l), b(l*n);
2998 for (
unsigned int j=0; j<l; j++) {
2999 for (
unsigned int i=0; i<m; i++) {
3000 a[i+j*m] = urand.number();
3001 A[i+j*m] =
a[i+j*m];
3004 for (
unsigned int j=0; j<n; j++) {
3005 for (
unsigned int i=0; i<l; i++) {
3006 b[i+j*l] = urand.number();
3007 B[i+j*l] = b[i+j*l];
3010 FadType alpha(ndot, urand.number());
3011 FadType beta(ndot, urand.number());
3012 for (
unsigned int k=0; k<ndot; k++) {
3013 alpha.fastAccessDx(k) = urand.number();
3014 beta.fastAccessDx(k) = urand.number();
3017 for (
unsigned int j=0; j<n; j++) {
3018 for (
unsigned int i=0; i<m; i++) {
3019 ScalarType
val = urand.number();
3020 C1[i+j*m] =
FadType(ndot, val);
3021 C2[i+j*m] =
FadType(ndot, val);
3022 C3[i+j*m] =
FadType(ndot, val);
3023 C4[i+j*m] =
FadType(ndot, val);
3024 C5[i+j*m] =
FadType(ndot, val);
3025 for (
unsigned int k=0; k<ndot; k++) {
3026 val = urand.number();
3027 C1[i+j*m].fastAccessDx(k) =
val;
3028 C2[i+j*m].fastAccessDx(k) =
val;
3029 C3[i+j*m].fastAccessDx(k) =
val;
3030 C4[i+j*m].fastAccessDx(k) =
val;
3031 C5[i+j*m].fastAccessDx(k) =
val;
3038 &
A[0], m, &
B[0], l, beta, &C1[0], m);
3042 &
A[0], m, &
B[0], l, beta, &C2[0], m);
3046 unsigned int sz = m*l + l*n + m*n*(1+ndot);
3049 &
A[0], m, &
B[0], l, beta, &C3[0], m);
3054 &
a[0], m, &b[0], l, beta, &C4[0], m);
3059 &
a[0], m, &b[0], l, beta, &C5[0], m);
3065 &
A[0], l, &
B[0], l, beta, &C1[0], m);
3067 &
A[0], l, &
B[0], l, beta, &C2[0], m);
3069 &
A[0], l, &
B[0], l, beta, &C3[0], m);
3071 &
a[0], l, &b[0], l, beta, &C4[0], m);
3073 &
a[0], l, &b[0], l, beta, &C5[0], m);
3082 &
A[0], m, &
B[0], n, beta, &C1[0], m);
3084 &
A[0], m, &
B[0], n, beta, &C2[0], m);
3086 &
A[0], m, &
B[0], n, beta, &C3[0], m);
3088 &
a[0], m, &b[0], n, beta, &C4[0], m);
3090 &
a[0], m, &b[0], n, beta, &C5[0], m);
3099 &
A[0], l, &
B[0], n, beta, &C1[0], m);
3101 &
A[0], l, &
B[0], n, beta, &C2[0], m);
3103 &
A[0], l, &
B[0], n, beta, &C3[0], m);
3105 &
a[0], l, &b[0], n, beta, &C4[0], m);
3107 &
a[0], l, &b[0], n, beta, &C5[0], m);
3116 template <
class FadType,
class ScalarType>
3125 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3126 for (
unsigned int j=0; j<m; j++) {
3127 for (
unsigned int i=0; i<m; i++) {
3128 A[i+j*m] =
FadType(ndot, urand.number());
3129 for (
unsigned int k=0; k<ndot; k++)
3133 for (
unsigned int j=0; j<n; j++) {
3134 for (
unsigned int i=0; i<m; i++) {
3135 B[i+j*m] =
FadType(ndot, urand.number());
3136 for (
unsigned int k=0; k<ndot; k++)
3140 FadType alpha(ndot, urand.number());
3141 FadType beta(ndot, urand.number());
3142 for (
unsigned int k=0; k<ndot; k++) {
3143 alpha.fastAccessDx(k) = urand.number();
3144 beta.fastAccessDx(k) = urand.number();
3147 for (
unsigned int j=0; j<n; j++) {
3148 for (
unsigned int i=0; i<m; i++) {
3149 ScalarType
val = urand.number();
3150 C1[i+j*m] =
FadType(ndot, val);
3151 C2[i+j*m] =
FadType(ndot, val);
3152 C3[i+j*m] =
FadType(ndot, val);
3153 for (
unsigned int k=0; k<ndot; k++) {
3154 val = urand.number();
3155 C1[i+j*m].fastAccessDx(k) =
val;
3156 C2[i+j*m].fastAccessDx(k) =
val;
3157 C3[i+j*m].fastAccessDx(k) =
val;
3164 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3168 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3172 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3175 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3181 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3183 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3185 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3192 template <
class FadType,
class ScalarType>
3201 VectorType A(
n*
n,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3202 for (
unsigned int j=0; j<n; j++) {
3203 for (
unsigned int i=0; i<n; i++) {
3204 A[i+j*n] =
FadType(ndot, urand.number());
3205 for (
unsigned int k=0; k<ndot; k++)
3209 for (
unsigned int j=0; j<n; j++) {
3210 for (
unsigned int i=0; i<m; i++) {
3211 B[i+j*m] =
FadType(ndot, urand.number());
3212 for (
unsigned int k=0; k<ndot; k++)
3216 FadType alpha(ndot, urand.number());
3217 FadType beta(ndot, urand.number());
3218 for (
unsigned int k=0; k<ndot; k++) {
3219 alpha.fastAccessDx(k) = urand.number();
3220 beta.fastAccessDx(k) = urand.number();
3223 for (
unsigned int j=0; j<n; j++) {
3224 for (
unsigned int i=0; i<m; i++) {
3225 ScalarType
val = urand.number();
3226 C1[i+j*m] =
FadType(ndot, val);
3227 C2[i+j*m] =
FadType(ndot, val);
3228 C3[i+j*m] =
FadType(ndot, val);
3229 for (
unsigned int k=0; k<ndot; k++) {
3230 val = urand.number();
3231 C1[i+j*m].fastAccessDx(k) =
val;
3232 C2[i+j*m].fastAccessDx(k) =
val;
3233 C3[i+j*m].fastAccessDx(k) =
val;
3240 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3244 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3248 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3251 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3257 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3259 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3261 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3268 template <
class FadType,
class ScalarType>
3277 unsigned int lda = m+4;
3278 unsigned int ldb = m+5;
3279 unsigned int ldc = m+6;
3280 VectorType A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3282 for (
unsigned int j=0; j<m; j++) {
3283 for (
unsigned int i=0; i<lda; i++) {
3284 A[i+j*lda] =
FadType(ndot, urand.number());
3285 for (
unsigned int k=0; k<ndot; k++)
3289 for (
unsigned int j=0; j<n; j++) {
3290 for (
unsigned int i=0; i<ldb; i++) {
3291 B[i+j*ldb] =
FadType(ndot, urand.number());
3292 for (
unsigned int k=0; k<ndot; k++)
3296 FadType alpha(ndot, urand.number());
3297 FadType beta(ndot, urand.number());
3298 for (
unsigned int k=0; k<ndot; k++) {
3299 alpha.fastAccessDx(k) = urand.number();
3300 beta.fastAccessDx(k) = urand.number();
3303 for (
unsigned int j=0; j<n; j++) {
3304 for (
unsigned int i=0; i<ldc; i++) {
3305 ScalarType
val = urand.number();
3306 C1[i+j*ldc] =
FadType(ndot, val);
3307 C2[i+j*ldc] =
FadType(ndot, val);
3308 C3[i+j*ldc] =
FadType(ndot, val);
3309 for (
unsigned int k=0; k<ndot; k++) {
3310 val = urand.number();
3311 C1[i+j*ldc].fastAccessDx(k) =
val;
3312 C2[i+j*ldc].fastAccessDx(k) =
val;
3313 C3[i+j*ldc].fastAccessDx(k) =
val;
3320 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3324 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3328 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3331 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3337 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3339 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3341 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3348 template <
class FadType,
class ScalarType>
3357 unsigned int lda =
n+4;
3358 unsigned int ldb = m+5;
3359 unsigned int ldc = m+6;
3360 VectorType A(lda*
n,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3362 for (
unsigned int j=0; j<n; j++) {
3363 for (
unsigned int i=0; i<lda; i++) {
3364 A[i+j*lda] =
FadType(ndot, urand.number());
3365 for (
unsigned int k=0; k<ndot; k++)
3369 for (
unsigned int j=0; j<n; j++) {
3370 for (
unsigned int i=0; i<ldb; i++) {
3371 B[i+j*ldb] =
FadType(ndot, urand.number());
3372 for (
unsigned int k=0; k<ndot; k++)
3376 FadType alpha(ndot, urand.number());
3377 FadType beta(ndot, urand.number());
3378 for (
unsigned int k=0; k<ndot; k++) {
3379 alpha.fastAccessDx(k) = urand.number();
3380 beta.fastAccessDx(k) = urand.number();
3383 for (
unsigned int j=0; j<n; j++) {
3384 for (
unsigned int i=0; i<ldc; i++) {
3385 ScalarType
val = urand.number();
3386 C1[i+j*ldc] =
FadType(ndot, val);
3387 C2[i+j*ldc] =
FadType(ndot, val);
3388 C3[i+j*ldc] =
FadType(ndot, val);
3389 for (
unsigned int k=0; k<ndot; k++) {
3390 val = urand.number();
3391 C1[i+j*ldc].fastAccessDx(k) =
val;
3392 C2[i+j*ldc].fastAccessDx(k) =
val;
3393 C3[i+j*ldc].fastAccessDx(k) =
val;
3400 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3404 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3408 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3411 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3417 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3419 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3421 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3428 template <
class FadType,
class ScalarType>
3437 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3438 for (
unsigned int j=0; j<m; j++) {
3439 for (
unsigned int i=0; i<m; i++) {
3440 A[i+j*m] =
FadType(ndot, urand.number());
3441 for (
unsigned int k=0; k<ndot; k++)
3445 for (
unsigned int j=0; j<n; j++) {
3446 for (
unsigned int i=0; i<m; i++) {
3447 B[i+j*m] =
FadType(ndot, urand.number());
3448 for (
unsigned int k=0; k<ndot; k++)
3452 FadType alpha(ndot, urand.number());
3453 FadType beta(ndot, urand.number());
3454 for (
unsigned int k=0; k<ndot; k++) {
3455 alpha.fastAccessDx(k) = urand.number();
3456 beta.fastAccessDx(k) = urand.number();
3459 for (
unsigned int j=0; j<n; j++) {
3460 for (
unsigned int i=0; i<m; i++) {
3461 ScalarType
val = urand.number();
3470 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3474 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3478 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3481 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3487 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3489 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3491 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3498 template <
class FadType,
class ScalarType>
3507 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3508 for (
unsigned int j=0; j<m; j++) {
3509 for (
unsigned int i=0; i<m; i++) {
3510 A[i+j*m] =
FadType(ndot, urand.number());
3511 for (
unsigned int k=0; k<ndot; k++)
3515 for (
unsigned int j=0; j<n; j++) {
3516 for (
unsigned int i=0; i<m; i++) {
3517 B[i+j*m] =
FadType(ndot, urand.number());
3518 for (
unsigned int k=0; k<ndot; k++)
3522 ScalarType alpha = urand.number();
3523 ScalarType beta = urand.number();
3525 for (
unsigned int j=0; j<n; j++) {
3526 for (
unsigned int i=0; i<m; i++) {
3527 ScalarType
val = urand.number();
3528 C1[i+j*m] =
FadType(ndot, val);
3529 C2[i+j*m] =
FadType(ndot, val);
3530 C3[i+j*m] =
FadType(ndot, val);
3531 for (
unsigned int k=0; k<ndot; k++) {
3532 val = urand.number();
3533 C1[i+j*m].fastAccessDx(k) =
val;
3534 C2[i+j*m].fastAccessDx(k) =
val;
3535 C3[i+j*m].fastAccessDx(k) =
val;
3542 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3546 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3550 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3553 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3559 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3561 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3563 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3570 template <
class FadType,
class ScalarType>
3579 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3580 C4(m*n,ndot), C5(m*n,ndot);
3581 std::vector<ScalarType>
a(m*m);
3582 for (
unsigned int j=0; j<m; j++) {
3583 for (
unsigned int i=0; i<m; i++) {
3584 a[i+j*m] = urand.number();
3585 A[i+j*m] = a[i+j*m];
3588 for (
unsigned int j=0; j<n; j++) {
3589 for (
unsigned int i=0; i<m; i++) {
3590 B[i+j*m] =
FadType(ndot, urand.number());
3591 for (
unsigned int k=0; k<ndot; k++)
3595 FadType alpha(ndot, urand.number());
3596 FadType beta(ndot, urand.number());
3597 for (
unsigned int k=0; k<ndot; k++) {
3598 alpha.fastAccessDx(k) = urand.number();
3599 beta.fastAccessDx(k) = urand.number();
3602 for (
unsigned int j=0; j<n; j++) {
3603 for (
unsigned int i=0; i<m; i++) {
3604 ScalarType
val = urand.number();
3605 C1[i+j*m] =
FadType(ndot, val);
3606 C2[i+j*m] =
FadType(ndot, val);
3607 C3[i+j*m] =
FadType(ndot, val);
3608 C4[i+j*m] =
FadType(ndot, val);
3609 C5[i+j*m] =
FadType(ndot, val);
3610 for (
unsigned int k=0; k<ndot; k++) {
3611 val = urand.number();
3612 C1[i+j*m].fastAccessDx(k) =
val;
3613 C2[i+j*m].fastAccessDx(k) =
val;
3614 C3[i+j*m].fastAccessDx(k) =
val;
3615 C4[i+j*m].fastAccessDx(k) =
val;
3616 C5[i+j*m].fastAccessDx(k) =
val;
3623 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3627 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3631 unsigned int sz = m*m + 2*m*n*(1+ndot);
3634 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3639 &a[0], m, &
B[0], m, beta, &C4[0], m);
3644 &a[0], m, &
B[0], m, beta, &C5[0], m);
3650 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3652 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3654 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3656 &a[0], m, &
B[0], m, beta, &C4[0], m);
3658 &a[0], m, &
B[0], m, beta, &C5[0], m);
3667 template <
class FadType,
class ScalarType>
3676 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3677 C4(m*n,ndot), C5(m*n,ndot);
3678 std::vector<ScalarType> b(m*n);
3679 for (
unsigned int j=0; j<m; j++) {
3680 for (
unsigned int i=0; i<m; i++) {
3681 A[i+j*m] =
FadType(ndot, urand.number());
3682 for (
unsigned int k=0; k<ndot; k++)
3686 for (
unsigned int j=0; j<n; j++) {
3687 for (
unsigned int i=0; i<m; i++) {
3688 b[i+j*m] = urand.number();
3689 B[i+j*m] = b[i+j*m];
3692 FadType alpha(ndot, urand.number());
3693 FadType beta(ndot, urand.number());
3694 for (
unsigned int k=0; k<ndot; k++) {
3695 alpha.fastAccessDx(k) = urand.number();
3696 beta.fastAccessDx(k) = urand.number();
3699 for (
unsigned int j=0; j<n; j++) {
3700 for (
unsigned int i=0; i<m; i++) {
3701 ScalarType
val = urand.number();
3702 C1[i+j*m] =
FadType(ndot, val);
3703 C2[i+j*m] =
FadType(ndot, val);
3704 C3[i+j*m] =
FadType(ndot, val);
3705 C4[i+j*m] =
FadType(ndot, val);
3706 C5[i+j*m] =
FadType(ndot, val);
3707 for (
unsigned int k=0; k<ndot; k++) {
3708 val = urand.number();
3709 C1[i+j*m].fastAccessDx(k) =
val;
3710 C2[i+j*m].fastAccessDx(k) =
val;
3711 C3[i+j*m].fastAccessDx(k) =
val;
3712 C4[i+j*m].fastAccessDx(k) =
val;
3713 C5[i+j*m].fastAccessDx(k) =
val;
3720 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3724 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3728 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3731 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3736 &
A[0], m, &b[0], m, beta, &C4[0], m);
3741 &
A[0], m, &b[0], m, beta, &C5[0], m);
3747 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3749 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3751 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3753 &
A[0], m, &b[0], m, beta, &C4[0], m);
3755 &
A[0], m, &b[0], m, beta, &C5[0], m);
3764 template <
class FadType,
class ScalarType>
3773 VectorType A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774 C4(m*n,ndot), C5(m*n,ndot);
3775 std::vector<ScalarType>
a(m*m), b(m*n);
3776 for (
unsigned int j=0; j<m; j++) {
3777 for (
unsigned int i=0; i<m; i++) {
3778 a[i+j*m] = urand.number();
3779 A[i+j*m] =
a[i+j*m];
3782 for (
unsigned int j=0; j<n; j++) {
3783 for (
unsigned int i=0; i<m; i++) {
3784 b[i+j*m] = urand.number();
3785 B[i+j*m] = b[i+j*m];
3788 FadType alpha(ndot, urand.number());
3789 FadType beta(ndot, urand.number());
3790 for (
unsigned int k=0; k<ndot; k++) {
3791 alpha.fastAccessDx(k) = urand.number();
3792 beta.fastAccessDx(k) = urand.number();
3795 for (
unsigned int j=0; j<n; j++) {
3796 for (
unsigned int i=0; i<m; i++) {
3797 ScalarType
val = urand.number();
3798 C1[i+j*m] =
FadType(ndot, val);
3799 C2[i+j*m] =
FadType(ndot, val);
3800 C3[i+j*m] =
FadType(ndot, val);
3801 C4[i+j*m] =
FadType(ndot, val);
3802 C5[i+j*m] =
FadType(ndot, val);
3803 for (
unsigned int k=0; k<ndot; k++) {
3804 val = urand.number();
3805 C1[i+j*m].fastAccessDx(k) =
val;
3806 C2[i+j*m].fastAccessDx(k) =
val;
3807 C3[i+j*m].fastAccessDx(k) =
val;
3808 C4[i+j*m].fastAccessDx(k) =
val;
3809 C5[i+j*m].fastAccessDx(k) =
val;
3816 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3820 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3824 unsigned int sz = m*m + m*n*(2+ndot);
3827 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3832 &
a[0], m, &b[0], m, beta, &C4[0], m);
3837 &
a[0], m, &b[0], m, beta, &C5[0], m);
3843 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3845 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3847 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3849 &
a[0], m, &b[0], m, beta, &C4[0], m);
3851 &
a[0], m, &b[0], m, beta, &C5[0], m);
3860 template <
class FadType,
class ScalarType>
3865 for (
unsigned int j=0; j<m; j++) {
3866 for (
unsigned int i=0; i<m; i++) {
3867 A[i+j*m] =
FadType(ndot, urand.number());
3868 for (
unsigned int k=0; k<ndot; k++)
3872 FadType alpha(ndot, urand.number());
3873 for (
unsigned int k=0; k<ndot; k++) {
3874 alpha.fastAccessDx(k) = urand.number();
3877 for (
unsigned int j=0; j<n; j++) {
3878 for (
unsigned int i=0; i<m; i++) {
3879 ScalarType
val = urand.number();
3882 B3[i+j*m] =
FadType(ndot, val);
3883 for (
unsigned int k=0; k<ndot; k++) {
3884 val = urand.number();
3885 B1[i+j*m].fastAccessDx(k) =
val;
3886 B2[i+j*m].fastAccessDx(k) =
val;
3887 B3[i+j*m].fastAccessDx(k) =
val;
3902 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3927 for (
unsigned int i=0; i<m; i++) {
3928 A[i*m+i].val() = 1.0;
3929 for (
unsigned int k=0; k<ndot; k++)
3943 template <
class FadType,
class ScalarType>
3948 for (
unsigned int j=0; j<n; j++) {
3949 for (
unsigned int i=0; i<n; i++) {
3950 A[i+j*n] =
FadType(ndot, urand.number());
3951 for (
unsigned int k=0; k<ndot; k++)
3955 FadType alpha(ndot, urand.number());
3956 for (
unsigned int k=0; k<ndot; k++) {
3957 alpha.fastAccessDx(k) = urand.number();
3960 for (
unsigned int j=0; j<n; j++) {
3961 for (
unsigned int i=0; i<m; i++) {
3962 ScalarType
val = urand.number();
3965 B3[i+j*m] =
FadType(ndot, val);
3966 for (
unsigned int k=0; k<ndot; k++) {
3967 val = urand.number();
3968 B1[i+j*m].fastAccessDx(k) =
val;
3969 B2[i+j*m].fastAccessDx(k) =
val;
3970 B3[i+j*m].fastAccessDx(k) =
val;
3985 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4010 for (
unsigned int i=0; i<n; i++) {
4011 A[i*n+i].val() = 1.0;
4012 for (
unsigned int k=0; k<ndot; k++)
4026 template <
class FadType,
class ScalarType>
4030 unsigned int lda = m+4;
4031 unsigned int ldb = m+5;
4033 for (
unsigned int j=0; j<m; j++) {
4034 for (
unsigned int i=0; i<lda; i++) {
4035 A[i+j*lda] =
FadType(ndot, urand.number());
4036 for (
unsigned int k=0; k<ndot; k++)
4040 FadType alpha(ndot, urand.number());
4041 for (
unsigned int k=0; k<ndot; k++) {
4042 alpha.fastAccessDx(k) = urand.number();
4045 for (
unsigned int j=0; j<n; j++) {
4046 for (
unsigned int i=0; i<ldb; i++) {
4047 ScalarType
val = urand.number();
4050 B3[i+j*ldb] =
FadType(ndot, val);
4051 for (
unsigned int k=0; k<ndot; k++) {
4052 val = urand.number();
4053 B1[i+j*ldb].fastAccessDx(k) =
val;
4054 B2[i+j*ldb].fastAccessDx(k) =
val;
4055 B3[i+j*ldb].fastAccessDx(k) =
val;
4070 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4095 for (
unsigned int i=0; i<m; i++) {
4096 A[i*lda+i].val() = 1.0;
4097 for (
unsigned int k=0; k<ndot; k++)
4111 template <
class FadType,
class ScalarType>
4115 unsigned int lda =
n+4;
4116 unsigned int ldb = m+5;
4118 for (
unsigned int j=0; j<n; j++) {
4119 for (
unsigned int i=0; i<lda; i++) {
4120 A[i+j*lda] =
FadType(ndot, urand.number());
4121 for (
unsigned int k=0; k<ndot; k++)
4125 FadType alpha(ndot, urand.number());
4126 for (
unsigned int k=0; k<ndot; k++) {
4127 alpha.fastAccessDx(k) = urand.number();
4130 for (
unsigned int j=0; j<n; j++) {
4131 for (
unsigned int i=0; i<ldb; i++) {
4132 ScalarType
val = urand.number();
4135 B3[i+j*ldb] =
FadType(ndot, val);
4136 for (
unsigned int k=0; k<ndot; k++) {
4137 val = urand.number();
4138 B1[i+j*ldb].fastAccessDx(k) =
val;
4139 B2[i+j*ldb].fastAccessDx(k) =
val;
4140 B3[i+j*ldb].fastAccessDx(k) =
val;
4155 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4180 for (
unsigned int i=0; i<n; i++) {
4181 A[i*lda+i].val() = 1.0;
4182 for (
unsigned int k=0; k<ndot; k++)
4196 template <
class FadType,
class ScalarType>
4201 for (
unsigned int j=0; j<m; j++) {
4202 for (
unsigned int i=0; i<m; i++) {
4203 A[i+j*m] =
FadType(ndot, urand.number());
4204 for (
unsigned int k=0; k<ndot; k++)
4208 ScalarType alpha = urand.number();
4210 for (
unsigned int j=0; j<n; j++) {
4211 for (
unsigned int i=0; i<m; i++) {
4212 ScalarType
val = urand.number();
4215 B3[i+j*m] =
FadType(ndot, val);
4216 for (
unsigned int k=0; k<ndot; k++) {
4217 val = urand.number();
4218 B1[i+j*m].fastAccessDx(k) =
val;
4219 B2[i+j*m].fastAccessDx(k) =
val;
4220 B3[i+j*m].fastAccessDx(k) =
val;
4235 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4260 for (
unsigned int i=0; i<m; i++) {
4261 A[i*m+i].val() = 1.0;
4262 for (
unsigned int k=0; k<ndot; k++)
4276 template <
class FadType,
class ScalarType>
4281 for (
unsigned int j=0; j<m; j++) {
4282 for (
unsigned int i=0; i<m; i++) {
4283 A[i+j*m] =
FadType(ndot, urand.number());
4284 for (
unsigned int k=0; k<ndot; k++)
4288 FadType alpha(ndot, urand.number());
4289 for (
unsigned int k=0; k<ndot; k++) {
4290 alpha.fastAccessDx(k) = urand.number();
4293 for (
unsigned int j=0; j<n; j++) {
4294 for (
unsigned int i=0; i<m; i++) {
4295 ScalarType
val = urand.number();
4312 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4337 for (
unsigned int i=0; i<m; i++) {
4338 A[i*m+i].val() = 1.0;
4339 for (
unsigned int k=0; k<ndot; k++)
4353 template <
class FadType,
class ScalarType>
4358 B4(m*n,ndot), B5(m*n,ndot);
4359 std::vector<ScalarType>
a(m*m);
4360 for (
unsigned int j=0; j<m; j++) {
4361 for (
unsigned int i=0; i<m; i++) {
4362 a[i+j*m] = urand.number();
4363 A[i+j*m] = a[i+j*m];
4366 FadType alpha(ndot, urand.number());
4367 for (
unsigned int k=0; k<ndot; k++) {
4368 alpha.fastAccessDx(k) = urand.number();
4371 for (
unsigned int j=0; j<n; j++) {
4372 for (
unsigned int i=0; i<m; i++) {
4373 ScalarType
val = urand.number();
4376 B3[i+j*m] =
FadType(ndot, val);
4377 B4[i+j*m] =
FadType(ndot, val);
4378 B5[i+j*m] =
FadType(ndot, val);
4379 for (
unsigned int k=0; k<ndot; k++) {
4380 val = urand.number();
4381 B1[i+j*m].fastAccessDx(k) =
val;
4382 B2[i+j*m].fastAccessDx(k) =
val;
4383 B3[i+j*m].fastAccessDx(k) =
val;
4384 B4[i+j*m].fastAccessDx(k) =
val;
4385 B5[i+j*m].fastAccessDx(k) =
val;
4400 unsigned int sz = m*m + m*n*(1+ndot);
4447 for (
unsigned int i=0; i<m; i++) {
4448 A[i*m+i].val() = 1.0;
4449 for (
unsigned int k=0; k<ndot; k++)
4469 template <
class FadType,
class ScalarType>
4474 for (
unsigned int j=0; j<m; j++) {
4475 for (
unsigned int i=0; i<m; i++) {
4477 A[i+j*m] =
FadType(ndot, urand.number());
4478 for (
unsigned int k=0; k<ndot; k++)
4482 FadType alpha(ndot, urand.number());
4483 for (
unsigned int k=0; k<ndot; k++) {
4484 alpha.fastAccessDx(k) = urand.number();
4488 for (
unsigned int j=0; j<n; j++) {
4489 for (
unsigned int i=0; i<m; i++) {
4490 ScalarType
val = urand.number();
4496 B3[i+j*m] =
FadType(ndot, val);
4497 for (
unsigned int k=0; k<ndot; k++) {
4498 val = urand.number();
4499 B1[i+j*m].fastAccessDx(k) =
val;
4500 B2[i+j*m].fastAccessDx(k) =
val;
4501 B3[i+j*m].fastAccessDx(k) =
val;
4516 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4541 for (
unsigned int i=0; i<m; i++) {
4542 A[i*m+i].val() = 1.0;
4543 for (
unsigned int k=0; k<ndot; k++)
4557 template <
class FadType,
class ScalarType>
4562 for (
unsigned int j=0; j<n; j++) {
4563 for (
unsigned int i=0; i<n; i++) {
4564 A[i+j*n] =
FadType(ndot, urand.number());
4565 for (
unsigned int k=0; k<ndot; k++)
4569 FadType alpha(ndot, urand.number());
4570 for (
unsigned int k=0; k<ndot; k++) {
4571 alpha.fastAccessDx(k) = urand.number();
4574 for (
unsigned int j=0; j<n; j++) {
4575 for (
unsigned int i=0; i<m; i++) {
4576 ScalarType
val = urand.number();
4579 B3[i+j*m] =
FadType(ndot, val);
4580 for (
unsigned int k=0; k<ndot; k++) {
4581 val = urand.number();
4582 B1[i+j*m].fastAccessDx(k) =
val;
4583 B2[i+j*m].fastAccessDx(k) =
val;
4584 B3[i+j*m].fastAccessDx(k) =
val;
4599 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4624 for (
unsigned int i=0; i<n; i++) {
4625 A[i*n+i].val() = 1.0;
4626 for (
unsigned int k=0; k<ndot; k++)
4640 template <
class FadType,
class ScalarType>
4644 unsigned int lda = m+4;
4645 unsigned int ldb = m+5;
4647 for (
unsigned int j=0; j<m; j++) {
4648 for (
unsigned int i=0; i<lda; i++) {
4649 A[i+j*lda] =
FadType(ndot, urand.number());
4650 for (
unsigned int k=0; k<ndot; k++)
4654 FadType alpha(ndot, urand.number());
4655 for (
unsigned int k=0; k<ndot; k++) {
4656 alpha.fastAccessDx(k) = urand.number();
4659 for (
unsigned int j=0; j<n; j++) {
4660 for (
unsigned int i=0; i<ldb; i++) {
4661 ScalarType
val = urand.number();
4664 B3[i+j*ldb] =
FadType(ndot, val);
4665 for (
unsigned int k=0; k<ndot; k++) {
4666 val = urand.number();
4667 B1[i+j*ldb].fastAccessDx(k) =
val;
4668 B2[i+j*ldb].fastAccessDx(k) =
val;
4669 B3[i+j*ldb].fastAccessDx(k) =
val;
4684 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4709 for (
unsigned int i=0; i<m; i++) {
4710 A[i*lda+i].val() = 1.0;
4711 for (
unsigned int k=0; k<ndot; k++)
4725 template <
class FadType,
class ScalarType>
4729 unsigned int lda =
n+4;
4730 unsigned int ldb = m+5;
4732 for (
unsigned int j=0; j<n; j++) {
4733 for (
unsigned int i=0; i<lda; i++) {
4734 A[i+j*lda] =
FadType(ndot, urand.number());
4735 for (
unsigned int k=0; k<ndot; k++)
4739 FadType alpha(ndot, urand.number());
4740 for (
unsigned int k=0; k<ndot; k++) {
4741 alpha.fastAccessDx(k) = urand.number();
4744 for (
unsigned int j=0; j<n; j++) {
4745 for (
unsigned int i=0; i<ldb; i++) {
4746 ScalarType
val = urand.number();
4749 B3[i+j*ldb] =
FadType(ndot, val);
4750 for (
unsigned int k=0; k<ndot; k++) {
4751 val = urand.number();
4752 B1[i+j*ldb].fastAccessDx(k) =
val;
4753 B2[i+j*ldb].fastAccessDx(k) =
val;
4754 B3[i+j*ldb].fastAccessDx(k) =
val;
4769 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4794 for (
unsigned int i=0; i<n; i++) {
4795 A[i*lda+i].val() = 1.0;
4796 for (
unsigned int k=0; k<ndot; k++)
4810 template <
class FadType,
class ScalarType>
4815 for (
unsigned int j=0; j<m; j++) {
4816 for (
unsigned int i=0; i<m; i++) {
4817 A[i+j*m] =
FadType(ndot, urand.number());
4818 for (
unsigned int k=0; k<ndot; k++)
4822 ScalarType alpha = urand.number();
4824 for (
unsigned int j=0; j<n; j++) {
4825 for (
unsigned int i=0; i<m; i++) {
4826 ScalarType
val = urand.number();
4829 B3[i+j*m] =
FadType(ndot, val);
4830 for (
unsigned int k=0; k<ndot; k++) {
4831 val = urand.number();
4832 B1[i+j*m].fastAccessDx(k) =
val;
4833 B2[i+j*m].fastAccessDx(k) =
val;
4834 B3[i+j*m].fastAccessDx(k) =
val;
4849 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4874 for (
unsigned int i=0; i<m; i++) {
4875 A[i*m+i].val() = 1.0;
4876 for (
unsigned int k=0; k<ndot; k++)
4890 template <
class FadType,
class ScalarType>
4895 for (
unsigned int j=0; j<m; j++) {
4896 for (
unsigned int i=0; i<m; i++) {
4897 A[i+j*m] =
FadType(ndot, urand.number());
4898 for (
unsigned int k=0; k<ndot; k++)
4902 FadType alpha(ndot, urand.number());
4903 for (
unsigned int k=0; k<ndot; k++) {
4904 alpha.fastAccessDx(k) = urand.number();
4907 for (
unsigned int j=0; j<n; j++) {
4908 for (
unsigned int i=0; i<m; i++) {
4909 ScalarType
val = urand.number();
4926 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4951 for (
unsigned int i=0; i<m; i++) {
4952 A[i*m+i].val() = 1.0;
4953 for (
unsigned int k=0; k<ndot; k++)
4967 template <
class FadType,
class ScalarType>
4972 B4(m*n,ndot), B5(m*n,ndot);
4973 std::vector<ScalarType>
a(m*m);
4974 for (
unsigned int j=0; j<m; j++) {
4975 for (
unsigned int i=0; i<m; i++) {
4976 a[i+j*m] = urand.number();
4977 A[i+j*m] = a[i+j*m];
4980 FadType alpha(ndot, urand.number());
4981 for (
unsigned int k=0; k<ndot; k++) {
4982 alpha.fastAccessDx(k) = urand.number();
4985 for (
unsigned int j=0; j<n; j++) {
4986 for (
unsigned int i=0; i<m; i++) {
4987 ScalarType
val = urand.number();
4990 B3[i+j*m] =
FadType(ndot, val);
4991 B4[i+j*m] =
FadType(ndot, val);
4992 B5[i+j*m] =
FadType(ndot, val);
4993 for (
unsigned int k=0; k<ndot; k++) {
4994 val = urand.number();
4995 B1[i+j*m].fastAccessDx(k) =
val;
4996 B2[i+j*m].fastAccessDx(k) =
val;
4997 B3[i+j*m].fastAccessDx(k) =
val;
4998 B4[i+j*m].fastAccessDx(k) =
val;
4999 B5[i+j*m].fastAccessDx(k) =
val;
5014 unsigned int sz = m*m + m*n*(1+ndot);
5061 for (
unsigned int i=0; i<m; i++) {
5062 A[i*m+i].val() = 1.0;
5063 for (
unsigned int k=0; k<ndot; k++)
5082 #undef COMPARE_VALUES
5084 #undef COMPARE_FAD_VECTORS
5086 #endif // FADBLASUNITTESTS_HPP
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Random< ScalarType > urand
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
CPPUNIT_TEST_SUITE(FadBLASUnitTests)
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
#define COMPARE_FADS(a, b)
Sacado::Random< double > real_urand
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Sacado::Fad::Vector< unsigned int, FadType > VectorType
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const