10 #ifndef FADBLASUNITTESTS_HPP
11 #define FADBLASUNITTESTS_HPP
23 #define COMPARE_FAD_VECTORS(X1, X2, n) \
24 ASSERT_TRUE(X1.size() == std::size_t(n)); \
25 ASSERT_TRUE(X2.size() == std::size_t(n)); \
26 for (unsigned int i=0; i<n; i++) { \
27 COMPARE_FADS(X1[i], X2[i]); \
32 template <
class FadType>
71 typedef decltype(this->fad)
FadType;
75 auto ndot = this->ndot_;
77 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
78 for (
unsigned int i=0;
i<m;
i++) {
79 ScalarType
val = this->urand.number();
83 for (
unsigned int k=0; k<ndot; k++) {
84 val = this->urand.number();
85 x1[
i].fastAccessDx(k) =
val;
86 x2[
i].fastAccessDx(k) =
val;
87 x3[
i].fastAccessDx(k) =
val;
90 FadType alpha(ndot, this->urand.number());
91 for (
unsigned int k=0; k<ndot; k++) {
92 alpha.fastAccessDx(k) = this->urand.number();
96 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
99 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
103 unsigned int sz = m*(1+ndot);
105 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
112 typedef decltype(this->fad)
FadType;
116 auto ndot = this->ndot_;
118 unsigned int incx = 2;
119 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
120 for (
unsigned int i=0;
i<m*incx;
i++) {
121 ScalarType
val = this->urand.number();
125 for (
unsigned int k=0; k<ndot; k++) {
126 val = this->urand.number();
127 x1[
i].fastAccessDx(k) =
val;
128 x2[
i].fastAccessDx(k) =
val;
129 x3[
i].fastAccessDx(k) =
val;
132 FadType alpha(ndot, this->urand.number());
133 for (
unsigned int k=0; k<ndot; k++) {
134 alpha.fastAccessDx(k) = this->urand.number();
138 teuchos_blas.
SCAL(m, alpha, &x1[0], incx);
141 sacado_blas.
SCAL(m, alpha, &x2[0], incx);
145 unsigned int sz = m*(1+ndot);
147 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
154 typedef decltype(this->fad)
FadType;
158 auto ndot = this->ndot_;
160 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
161 for (
unsigned int i=0;
i<m;
i++) {
162 ScalarType
val = this->urand.number();
166 for (
unsigned int k=0; k<ndot; k++) {
167 val = this->urand.number();
168 x1[
i].fastAccessDx(k) =
val;
169 x2[
i].fastAccessDx(k) =
val;
170 x3[
i].fastAccessDx(k) =
val;
173 ScalarType alpha = this->urand.number();
176 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
179 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
183 unsigned int sz = m*(1+ndot);
185 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
192 typedef decltype(this->fad)
FadType;
196 auto ndot = this->ndot_;
198 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
199 for (
unsigned int i=0;
i<m;
i++) {
200 ScalarType
val = this->urand.number();
205 FadType alpha =
FadType(ndot, this->urand.number());
206 for (
unsigned int k=0; k<ndot; k++)
207 alpha.fastAccessDx(k) = this->urand.number();
210 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
213 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
217 unsigned int sz = m*(1+ndot);
219 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
226 typedef decltype(this->fad)
FadType;
230 auto ndot = this->ndot_;
232 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
233 for (
unsigned int i=0;
i<m;
i++) {
234 x[
i] =
FadType(ndot, this->urand.number());
235 ScalarType
val = this->urand.number();
239 for (
unsigned int k=0; k<ndot; k++) {
240 x[
i].fastAccessDx(k) = this->urand.number();
241 val = this->urand.number();
242 y1[
i].fastAccessDx(k) =
val;
243 y2[
i].fastAccessDx(k) =
val;
244 y3[
i].fastAccessDx(k) =
val;
249 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
252 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
256 unsigned int sz = 2*m*(1+ndot);
258 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
265 typedef decltype(this->fad)
FadType;
269 auto ndot = this->ndot_;
271 unsigned int incx = 2;
272 unsigned int incy = 3;
273 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
274 for (
unsigned int i=0;
i<m*incx;
i++) {
275 x[
i] =
FadType(ndot, this->urand.number());
276 for (
unsigned int k=0; k<ndot; k++) {
277 x[
i].fastAccessDx(k) = this->urand.number();
280 for (
unsigned int i=0;
i<m*incy;
i++) {
281 ScalarType
val = this->urand.number();
285 for (
unsigned int k=0; k<ndot; k++) {
286 val = this->urand.number();
287 y1[
i].fastAccessDx(k) =
val;
288 y2[
i].fastAccessDx(k) =
val;
289 y3[
i].fastAccessDx(k) =
val;
294 teuchos_blas.
COPY(m, &
x[0], incx, &y1[0], incy);
297 sacado_blas.
COPY(m, &
x[0], incx, &y2[0], incy);
301 unsigned int sz = 2*m*(1+ndot);
303 sacado_blas2.COPY(m, &
x[0], incx, &y3[0], incy);
310 typedef decltype(this->fad)
FadType;
314 auto ndot = this->ndot_;
316 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
317 for (
unsigned int i=0;
i<m;
i++) {
318 x[
i] = this->urand.number();
320 for (
unsigned int i=0;
i<m;
i++) {
321 ScalarType
val = this->urand.number();
325 for (
unsigned int k=0; k<ndot; k++) {
326 val = this->urand.number();
327 y1[
i].fastAccessDx(k) =
val;
328 y2[
i].fastAccessDx(k) =
val;
329 y3[
i].fastAccessDx(k) =
val;
334 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
337 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
341 unsigned int sz = 2*m*(1+ndot);
343 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
350 typedef decltype(this->fad)
FadType;
354 auto ndot = this->ndot_;
356 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
357 for (
unsigned int i=0;
i<m;
i++) {
358 x[
i] =
FadType(ndot, this->urand.number());
359 ScalarType
val = this->urand.number();
363 for (
unsigned int k=0; k<ndot; k++) {
364 x[
i].fastAccessDx(k) = this->urand.number();
369 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
372 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
376 unsigned int sz = 2*m*(1+ndot);
378 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
385 typedef decltype(this->fad)
FadType;
389 auto ndot = this->ndot_;
391 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
392 for (
unsigned int i=0;
i<m;
i++) {
393 x[
i] =
FadType(ndot, this->urand.number());
394 ScalarType
val = this->urand.number();
398 for (
unsigned int k=0; k<ndot; k++) {
399 x[
i].fastAccessDx(k) = this->urand.number();
400 val = this->urand.number();
401 y1[
i].fastAccessDx(k) =
val;
402 y2[
i].fastAccessDx(k) =
val;
403 y3[
i].fastAccessDx(k) =
val;
406 FadType alpha(ndot, this->urand.number());
407 for (
unsigned int k=0; k<ndot; k++)
408 alpha.fastAccessDx(k) = this->urand.number();
411 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
414 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
418 unsigned int sz = 2*m*(1+ndot);
420 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
427 typedef decltype(this->fad)
FadType;
431 auto ndot = this->ndot_;
433 unsigned int incx = 2;
434 unsigned int incy = 3;
435 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
436 for (
unsigned int i=0;
i<m*incx;
i++) {
437 x[
i] =
FadType(ndot, this->urand.number());
438 for (
unsigned int k=0; k<ndot; k++) {
439 x[
i].fastAccessDx(k) = this->urand.number();
442 for (
unsigned int i=0;
i<m*incy;
i++) {
443 ScalarType
val = this->urand.number();
447 for (
unsigned int k=0; k<ndot; k++) {
448 val = this->urand.number();
449 y1[
i].fastAccessDx(k) =
val;
450 y2[
i].fastAccessDx(k) =
val;
451 y3[
i].fastAccessDx(k) =
val;
454 FadType alpha(ndot, this->urand.number());
455 for (
unsigned int k=0; k<ndot; k++)
456 alpha.fastAccessDx(k) = this->urand.number();
459 teuchos_blas.
AXPY(m, alpha, &
x[0], incx, &y1[0], incy);
462 sacado_blas.
AXPY(m, alpha, &
x[0], incx, &y2[0], incy);
466 unsigned int sz = 2*m*(1+ndot);
468 sacado_blas2.AXPY(m, alpha, &
x[0], incx, &y3[0], incy);
475 typedef decltype(this->fad)
FadType;
479 auto ndot = this->ndot_;
481 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
482 std::vector<ScalarType> xx(m);
483 for (
unsigned int i=0;
i<m;
i++) {
484 xx[
i] = this->urand.number();
486 ScalarType
val = this->urand.number();
491 for (
unsigned int k=0; k<ndot; k++) {
492 val = this->urand.number();
493 y1[
i].fastAccessDx(k) =
val;
494 y2[
i].fastAccessDx(k) =
val;
495 y3[
i].fastAccessDx(k) =
val;
496 y4[
i].fastAccessDx(k) =
val;
499 FadType alpha(ndot, this->urand.number());
500 for (
unsigned int k=0; k<ndot; k++)
501 alpha.fastAccessDx(k) = this->urand.number();
504 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
507 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
511 unsigned int sz = m*(1+ndot)+m;
513 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
517 sacado_blas.
AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
524 typedef decltype(this->fad)
FadType;
528 auto ndot = this->ndot_;
530 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
531 for (
unsigned int i=0;
i<m;
i++) {
532 x[
i] =
FadType(ndot, this->urand.number());
533 ScalarType
val = this->urand.number();
537 for (
unsigned int k=0; k<ndot; k++) {
538 x[
i].fastAccessDx(k) = this->urand.number();
541 FadType alpha(ndot, this->urand.number());
542 for (
unsigned int k=0; k<ndot; k++)
543 alpha.fastAccessDx(k) = this->urand.number();
546 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
549 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
553 unsigned int sz = 2*m*(1+ndot);
555 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
562 typedef decltype(this->fad)
FadType;
565 auto ndot = this->ndot_;
567 VectorType X(m,ndot), Y(m,ndot);
568 for (
unsigned int i=0;
i<m;
i++) {
569 X[
i] =
FadType(ndot, this->real_urand.number());
570 Y[
i] =
FadType(ndot, this->real_urand.number());
571 for (
unsigned int k=0; k<ndot; k++) {
572 X[
i].fastAccessDx(k) = this->real_urand.number();
573 Y[
i].fastAccessDx(k) = this->real_urand.number();
578 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
581 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
585 unsigned int sz = 2*m*(1+ndot);
587 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
594 typedef decltype(this->fad)
FadType;
597 auto ndot = this->ndot_;
599 unsigned int incx = 2;
600 unsigned int incy = 3;
601 VectorType X(m*incx,ndot), Y(m*incy,ndot);
602 for (
unsigned int i=0;
i<m*incx;
i++) {
603 X[
i] =
FadType(ndot, this->real_urand.number());
604 for (
unsigned int k=0; k<ndot; k++) {
605 X[
i].fastAccessDx(k) = this->real_urand.number();
608 for (
unsigned int i=0;
i<m*incy;
i++) {
609 Y[
i] =
FadType(ndot, this->real_urand.number());
610 for (
unsigned int k=0; k<ndot; k++) {
611 Y[
i].fastAccessDx(k) = this->real_urand.number();
616 FadType z1 = teuchos_blas.
DOT(m, &X[0], incx, &Y[0], incy);
619 FadType z2 = sacado_blas.
DOT(m, &X[0], incx, &Y[0], incy);
623 unsigned int sz = 2*m*(1+ndot);
625 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
632 typedef decltype(this->fad)
FadType;
636 auto ndot = this->ndot_;
638 VectorType X(m,0), Y(m,ndot);
639 std::vector<ScalarType>
x(m);
640 for (
unsigned int i=0;
i<m;
i++) {
641 x[
i] = this->urand.number();
643 Y[
i] =
FadType(ndot, this->real_urand.number());
644 for (
unsigned int k=0; k<ndot; k++) {
645 Y[
i].fastAccessDx(k) = this->real_urand.number();
650 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
653 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
657 unsigned int sz = 2*m*(1+ndot);
659 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
663 FadType z4 = sacado_blas.
DOT(m, &x[0], 1, &Y[0], 1);
670 typedef decltype(this->fad)
FadType;
674 auto ndot = this->ndot_;
676 VectorType X(m,ndot), Y(m,0);
677 std::vector<ScalarType>
y(m);
678 for (
unsigned int i=0;
i<m;
i++) {
679 X[
i] =
FadType(ndot, this->real_urand.number());
680 y[
i] = this->urand.number();
682 for (
unsigned int k=0; k<ndot; k++) {
683 X[
i].fastAccessDx(k) = this->real_urand.number();
688 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
691 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
695 unsigned int sz = 2*m*(1+ndot);
697 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
701 FadType z4 = sacado_blas.
DOT(m, &X[0], 1, &y[0], 1);
708 typedef decltype(this->fad)
FadType;
711 auto ndot = this->ndot_;
713 VectorType X(m,ndot);
714 for (
unsigned int i=0;
i<m;
i++) {
715 X[
i] =
FadType(ndot, this->real_urand.number());
716 for (
unsigned int k=0; k<ndot; k++) {
717 X[
i].fastAccessDx(k) = this->real_urand.number();
723 teuchos_blas.
NRM2(m, &X[0], 1);
727 sacado_blas.
NRM2(m, &X[0], 1);
731 unsigned int sz = m*(1+ndot);
734 sacado_blas2.NRM2(m, &X[0], 1);
741 typedef decltype(this->fad)
FadType;
744 auto ndot = this->ndot_;
746 unsigned int incx = 2;
747 VectorType X(m*incx,ndot);
748 for (
unsigned int i=0;
i<m*incx;
i++) {
749 X[
i] =
FadType(ndot, this->real_urand.number());
750 for (
unsigned int k=0; k<ndot; k++) {
751 X[
i].fastAccessDx(k) = this->real_urand.number();
757 teuchos_blas.
NRM2(m, &X[0], incx);
761 sacado_blas.
NRM2(m, &X[0], incx);
765 unsigned int sz = m*(1+ndot);
768 sacado_blas2.NRM2(m, &X[0], incx);
775 typedef decltype(this->fad)
FadType;
780 auto ndot = this->ndot_;
782 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
783 for (
unsigned int j=0; j<n; j++) {
784 for (
unsigned int i=0;
i<m;
i++) {
785 A[
i+j*m] =
FadType(ndot, this->urand.number());
786 for (
unsigned int k=0; k<ndot; k++)
789 B[j] =
FadType(ndot, this->urand.number());
790 for (
unsigned int k=0; k<ndot; k++)
793 FadType alpha(ndot, this->urand.number());
794 FadType beta(ndot, this->urand.number());
795 for (
unsigned int k=0; k<ndot; k++) {
796 alpha.fastAccessDx(k) = this->urand.number();
797 beta.fastAccessDx(k) = this->urand.number();
800 for (
unsigned int i=0;
i<m;
i++) {
801 ScalarType
val = this->urand.number();
805 for (
unsigned int k=0; k<ndot; k++) {
806 val = this->urand.number();
807 C1[
i].fastAccessDx(k) =
val;
808 C2[
i].fastAccessDx(k) =
val;
809 C3[
i].fastAccessDx(k) =
val;
823 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
833 typedef decltype(this->fad)
FadType;
838 auto ndot = this->ndot_;
840 unsigned int lda = m+3;
841 unsigned int incb = 2;
842 unsigned int incc = 3;
843 VectorType
A(lda*
n,ndot),
B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
845 for (
unsigned int j=0; j<n; j++) {
846 for (
unsigned int i=0;
i<lda;
i++) {
847 A[
i+j*lda] =
FadType(ndot, this->urand.number());
848 for (
unsigned int k=0; k<ndot; k++)
852 for (
unsigned int j=0; j<n*incb; j++) {
853 B[j] =
FadType(ndot, this->urand.number());
854 for (
unsigned int k=0; k<ndot; k++)
857 FadType alpha(ndot, this->urand.number());
858 FadType beta(ndot, this->urand.number());
859 for (
unsigned int k=0; k<ndot; k++) {
860 alpha.fastAccessDx(k) = this->urand.number();
861 beta.fastAccessDx(k) = this->urand.number();
864 for (
unsigned int i=0;
i<m*incc;
i++) {
865 ScalarType
val = this->urand.number();
869 for (
unsigned int k=0; k<ndot; k++) {
870 val = this->urand.number();
871 C1[
i].fastAccessDx(k) =
val;
872 C2[
i].fastAccessDx(k) =
val;
873 C3[
i].fastAccessDx(k) =
val;
887 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
897 typedef decltype(this->fad)
FadType;
902 auto ndot = this->ndot_;
904 VectorType
A(m*
n,ndot),
B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
905 for (
unsigned int j=0; j<n; j++) {
906 for (
unsigned int i=0;
i<m;
i++) {
907 A[
i+j*m] =
FadType(ndot, this->urand.number());
908 for (
unsigned int k=0; k<ndot; k++)
912 for (
unsigned int j=0; j<m; j++) {
913 B[j] =
FadType(ndot, this->urand.number());
914 for (
unsigned int k=0; k<ndot; k++)
917 FadType alpha(ndot, this->urand.number());
918 FadType beta(ndot, this->urand.number());
919 for (
unsigned int k=0; k<ndot; k++) {
920 alpha.fastAccessDx(k) = this->urand.number();
921 beta.fastAccessDx(k) = this->urand.number();
924 for (
unsigned int i=0;
i<n;
i++) {
925 ScalarType
val = this->urand.number();
929 for (
unsigned int k=0; k<ndot; k++) {
930 val = this->urand.number();
931 C1[
i].fastAccessDx(k) =
val;
932 C2[
i].fastAccessDx(k) =
val;
933 C3[
i].fastAccessDx(k) =
val;
947 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
957 typedef decltype(this->fad)
FadType;
962 auto ndot = this->ndot_;
964 unsigned int lda = m+3;
965 unsigned int incb = 2;
966 unsigned int incc = 3;
967 VectorType
A(lda*
n,ndot),
B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
969 for (
unsigned int j=0; j<n; j++) {
970 for (
unsigned int i=0;
i<lda;
i++) {
971 A[
i+j*lda] =
FadType(ndot, this->urand.number());
972 for (
unsigned int k=0; k<ndot; k++)
976 for (
unsigned int j=0; j<m*incb; j++) {
977 B[j] =
FadType(ndot, this->urand.number());
978 for (
unsigned int k=0; k<ndot; k++)
981 FadType alpha(ndot, this->urand.number());
982 FadType beta(ndot, this->urand.number());
983 for (
unsigned int k=0; k<ndot; k++) {
984 alpha.fastAccessDx(k) = this->urand.number();
985 beta.fastAccessDx(k) = this->urand.number();
988 for (
unsigned int i=0;
i<n*incc;
i++) {
989 ScalarType
val = this->urand.number();
993 for (
unsigned int k=0; k<ndot; k++) {
994 val = this->urand.number();
995 C1[
i].fastAccessDx(k) =
val;
996 C2[
i].fastAccessDx(k) =
val;
997 C3[
i].fastAccessDx(k) =
val;
1003 beta, &C1[0], incc);
1007 beta, &C2[0], incc);
1011 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1014 beta, &C3[0], incc);
1021 typedef decltype(this->fad)
FadType;
1026 auto ndot = this->ndot_;
1028 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1029 for (
unsigned int j=0; j<n; j++) {
1030 for (
unsigned int i=0;
i<m;
i++) {
1031 A[
i+j*m] =
FadType(ndot, this->urand.number());
1032 for (
unsigned int k=0; k<ndot; k++)
1035 B[j] =
FadType(ndot, this->urand.number());
1036 for (
unsigned int k=0; k<ndot; k++)
1039 FadType alpha(ndot, this->urand.number());
1040 FadType beta(ndot, this->urand.number());
1041 for (
unsigned int k=0; k<ndot; k++) {
1042 alpha.fastAccessDx(k) = this->urand.number();
1043 beta.fastAccessDx(k) = this->urand.number();
1046 for (
unsigned int i=0;
i<m;
i++) {
1047 ScalarType
val = this->urand.number();
1063 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1073 typedef decltype(this->fad)
FadType;
1078 auto ndot = this->ndot_;
1080 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1081 for (
unsigned int j=0; j<n; j++) {
1082 for (
unsigned int i=0;
i<m;
i++) {
1083 A[
i+j*m] =
FadType(ndot, this->urand.number());
1084 for (
unsigned int k=0; k<ndot; k++)
1087 B[j] =
FadType(ndot, this->urand.number());
1088 for (
unsigned int k=0; k<ndot; k++)
1091 ScalarType alpha = this->urand.number();
1092 ScalarType beta = this->urand.number();
1094 for (
unsigned int i=0;
i<m;
i++) {
1095 ScalarType
val = this->urand.number();
1099 for (
unsigned int k=0; k<ndot; k++) {
1100 val = this->urand.number();
1101 C1[
i].fastAccessDx(k) =
val;
1102 C2[
i].fastAccessDx(k) =
val;
1103 C3[
i].fastAccessDx(k) =
val;
1117 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1127 typedef decltype(this->fad)
FadType;
1132 auto ndot = this->ndot_;
1134 VectorType
A(m*
n,ndot),
B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1136 std::vector<ScalarType> b(n);
1137 for (
unsigned int j=0; j<n; j++) {
1138 for (
unsigned int i=0;
i<m;
i++) {
1139 A[
i+j*m] =
FadType(ndot, this->urand.number());
1140 for (
unsigned int k=0; k<ndot; k++)
1143 b[j] = this->urand.number();
1146 FadType alpha(ndot, this->urand.number());
1147 FadType beta(ndot, this->urand.number());
1148 for (
unsigned int k=0; k<ndot; k++) {
1149 alpha.fastAccessDx(k) = this->urand.number();
1150 beta.fastAccessDx(k) = this->urand.number();
1153 for (
unsigned int i=0;
i<m;
i++) {
1154 ScalarType
val = this->urand.number();
1159 for (
unsigned int k=0; k<ndot; k++) {
1160 val = this->urand.number();
1161 C1[
i].fastAccessDx(k) =
val;
1162 C2[
i].fastAccessDx(k) =
val;
1163 C3[
i].fastAccessDx(k) =
val;
1164 C4[
i].fastAccessDx(k) =
val;
1178 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1193 typedef decltype(this->fad)
FadType;
1198 auto ndot = this->ndot_;
1200 VectorType
A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1202 std::vector<ScalarType>
a(m*n);
1203 for (
unsigned int j=0; j<n; j++) {
1204 for (
unsigned int i=0;
i<m;
i++) {
1205 a[
i+j*m] = this->urand.number();
1206 A[
i+j*m] = a[
i+j*m];
1208 B[j] =
FadType(ndot, this->urand.number());
1209 for (
unsigned int k=0; k<ndot; k++)
1212 FadType alpha(ndot, this->urand.number());
1213 FadType beta(ndot, this->urand.number());
1214 for (
unsigned int k=0; k<ndot; k++) {
1215 alpha.fastAccessDx(k) = this->urand.number();
1216 beta.fastAccessDx(k) = this->urand.number();
1219 for (
unsigned int i=0;
i<m;
i++) {
1220 ScalarType
val = this->urand.number();
1225 for (
unsigned int k=0; k<ndot; k++) {
1226 val = this->urand.number();
1227 C1[
i].fastAccessDx(k) =
val;
1228 C2[
i].fastAccessDx(k) =
val;
1229 C3[
i].fastAccessDx(k) =
val;
1230 C4[
i].fastAccessDx(k) =
val;
1244 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1259 typedef decltype(this->fad)
FadType;
1264 auto ndot = this->ndot_;
1266 VectorType
A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1268 std::vector<ScalarType>
a(m*n), b(n);
1269 for (
unsigned int j=0; j<n; j++) {
1270 for (
unsigned int i=0;
i<m;
i++) {
1271 a[
i+j*m] = this->urand.number();
1272 A[
i+j*m] =
a[
i+j*m];
1274 b[j] = this->urand.number();
1277 FadType alpha(ndot, this->urand.number());
1278 FadType beta(ndot, this->urand.number());
1279 for (
unsigned int k=0; k<ndot; k++) {
1280 alpha.fastAccessDx(k) = this->urand.number();
1281 beta.fastAccessDx(k) = this->urand.number();
1284 for (
unsigned int i=0;
i<m;
i++) {
1285 ScalarType
val = this->urand.number();
1290 for (
unsigned int k=0; k<ndot; k++) {
1291 val = this->urand.number();
1292 C1[
i].fastAccessDx(k) =
val;
1293 C2[
i].fastAccessDx(k) =
val;
1294 C3[
i].fastAccessDx(k) =
val;
1295 C4[
i].fastAccessDx(k) =
val;
1309 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1324 typedef decltype(this->fad)
FadType;
1328 auto ndot = this->ndot_;
1330 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1331 for (
unsigned int j=0; j<n; j++) {
1332 for (
unsigned int i=0;
i<n;
i++) {
1333 A[
i+j*n] =
FadType(ndot, this->urand.number());
1334 for (
unsigned int k=0; k<ndot; k++)
1337 ScalarType
val = this->urand.number();
1341 for (
unsigned int k=0; k<ndot; k++) {
1342 val = this->urand.number();
1343 x1[j].fastAccessDx(k) =
val;
1344 x2[j].fastAccessDx(k) =
val;
1345 x3[j].fastAccessDx(k) =
val;
1359 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1384 for (
unsigned int i=0;
i<n;
i++) {
1385 A[
i*n+
i].val() = 1.0;
1386 for (
unsigned int k=0; k<ndot; k++)
1401 typedef decltype(this->fad)
FadType;
1405 auto ndot = this->ndot_;
1407 unsigned int lda =
n+3;
1408 unsigned int incx = 2;
1409 VectorType
A(lda*
n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1410 for (
unsigned int j=0; j<n; j++) {
1411 for (
unsigned int i=0;
i<lda;
i++) {
1412 A[
i+j*lda] =
FadType(ndot, this->urand.number());
1413 for (
unsigned int k=0; k<ndot; k++)
1417 for (
unsigned int j=0; j<n*incx; j++) {
1418 ScalarType
val = this->urand.number();
1422 for (
unsigned int k=0; k<ndot; k++) {
1423 val = this->urand.number();
1424 x1[j].fastAccessDx(k) =
val;
1425 x2[j].fastAccessDx(k) =
val;
1426 x3[j].fastAccessDx(k) =
val;
1440 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1465 for (
unsigned int i=0;
i<n;
i++) {
1466 A[
i*lda+
i].val() = 1.0;
1467 for (
unsigned int k=0; k<ndot; k++)
1482 typedef decltype(this->fad)
FadType;
1486 auto ndot = this->ndot_;
1488 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1490 std::vector<ScalarType>
a(n*n);
1491 for (
unsigned int j=0; j<n; j++) {
1492 for (
unsigned int i=0;
i<n;
i++) {
1493 a[
i+j*n] = this->urand.number();
1494 A[
i+j*n] = a[
i+j*n];
1496 ScalarType
val = this->urand.number();
1502 for (
unsigned int k=0; k<ndot; k++) {
1503 val = this->urand.number();
1504 x1[j].fastAccessDx(k) =
val;
1505 x2[j].fastAccessDx(k) =
val;
1506 x3[j].fastAccessDx(k) =
val;
1507 x4[j].fastAccessDx(k) =
val;
1508 x5[j].fastAccessDx(k) =
val;
1522 unsigned int sz = n*n+n*(1+ndot);
1569 for (
unsigned int i=0;
i<n;
i++) {
1570 A[
i*n+
i].val() = 1.0;
1571 for (
unsigned int k=0; k<ndot; k++)
1592 typedef decltype(this->fad)
FadType;
1596 auto ndot = this->ndot_;
1598 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1599 for (
unsigned int j=0; j<n; j++) {
1600 for (
unsigned int i=0;
i<n;
i++) {
1601 A[
i+j*n] =
FadType(ndot, this->urand.number());
1602 for (
unsigned int k=0; k<ndot; k++)
1605 ScalarType
val = this->urand.number();
1621 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1646 for (
unsigned int i=0;
i<n;
i++) {
1647 A[
i*n+
i].val() = 1.0;
1648 for (
unsigned int k=0; k<ndot; k++)
1663 typedef decltype(this->fad)
FadType;
1668 auto ndot = this->ndot_;
1675 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1676 for (
unsigned int j=0; j<n; j++) {
1677 for (
unsigned int i=0;
i<m;
i++) {
1678 ScalarType
val = this->urand.number();
1682 for (
unsigned int k=0; k<ndot; k++) {
1683 val = this->urand.number();
1684 A1[
i+j*m].fastAccessDx(k) =
val;
1685 A2[
i+j*m].fastAccessDx(k) =
val;
1686 A3[
i+j*m].fastAccessDx(k) =
val;
1690 for (
unsigned int i=0;
i<m;
i++) {
1691 x[
i] =
FadType(ndot, this->urand.number());
1692 for (
unsigned int k=0; k<ndot; k++)
1695 for (
unsigned int i=0;
i<n;
i++) {
1696 y[
i] =
FadType(ndot, this->urand.number());
1697 for (
unsigned int k=0; k<ndot; k++)
1700 FadType alpha(ndot, this->urand.number());
1701 for (
unsigned int k=0; k<ndot; k++) {
1702 alpha.fastAccessDx(k) = this->urand.number();
1706 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1709 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1713 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1715 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1722 typedef decltype(this->fad)
FadType;
1727 auto ndot = this->ndot_;
1734 unsigned int lda = m+3;
1735 unsigned int incx = 2;
1736 unsigned int incy = 3;
1737 VectorType A1(lda*
n,ndot), A2(lda*n,ndot), A3(lda*n,ndot),
x(m*incx,ndot),
1739 for (
unsigned int j=0; j<n; j++) {
1740 for (
unsigned int i=0;
i<lda;
i++) {
1741 ScalarType
val = this->urand.number();
1745 for (
unsigned int k=0; k<ndot; k++) {
1746 val = this->urand.number();
1747 A1[
i+j*lda].fastAccessDx(k) =
val;
1748 A2[
i+j*lda].fastAccessDx(k) =
val;
1749 A3[
i+j*lda].fastAccessDx(k) =
val;
1753 for (
unsigned int i=0;
i<m*incx;
i++) {
1754 x[
i] =
FadType(ndot, this->urand.number());
1755 for (
unsigned int k=0; k<ndot; k++)
1758 for (
unsigned int i=0;
i<n*incy;
i++) {
1759 y[
i] =
FadType(ndot, this->urand.number());
1760 for (
unsigned int k=0; k<ndot; k++)
1763 FadType alpha(ndot, this->urand.number());
1764 for (
unsigned int k=0; k<ndot; k++) {
1765 alpha.fastAccessDx(k) = this->urand.number();
1769 teuchos_blas.
GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A1[0], lda);
1772 sacado_blas.
GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A2[0], lda);
1776 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1778 sacado_blas2.GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A3[0], lda);
1785 typedef decltype(this->fad)
FadType;
1790 auto ndot = this->ndot_;
1797 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1798 for (
unsigned int j=0; j<n; j++) {
1799 for (
unsigned int i=0;
i<m;
i++) {
1800 ScalarType
val = this->urand.number();
1804 for (
unsigned int k=0; k<ndot; k++) {
1805 val = this->urand.number();
1806 A1[
i+j*m].fastAccessDx(k) =
val;
1807 A2[
i+j*m].fastAccessDx(k) =
val;
1808 A3[
i+j*m].fastAccessDx(k) =
val;
1812 for (
unsigned int i=0;
i<m;
i++) {
1813 x[
i] =
FadType(ndot, this->urand.number());
1814 for (
unsigned int k=0; k<ndot; k++)
1817 for (
unsigned int i=0;
i<n;
i++) {
1818 y[
i] =
FadType(ndot, this->urand.number());
1819 for (
unsigned int k=0; k<ndot; k++)
1822 ScalarType alpha = this->urand.number();
1825 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1828 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1832 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1834 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1841 typedef decltype(this->fad)
FadType;
1846 auto ndot = this->ndot_;
1853 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1854 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1855 std::vector<ScalarType> xx(m);
1856 for (
unsigned int j=0; j<n; j++) {
1857 for (
unsigned int i=0;
i<m;
i++) {
1858 ScalarType
val = this->urand.number();
1864 for (
unsigned int k=0; k<ndot; k++) {
1865 val = this->urand.number();
1866 A1[
i+j*m].fastAccessDx(k) =
val;
1867 A2[
i+j*m].fastAccessDx(k) =
val;
1868 A3[
i+j*m].fastAccessDx(k) =
val;
1869 A4[
i+j*m].fastAccessDx(k) =
val;
1870 A5[
i+j*m].fastAccessDx(k) =
val;
1874 for (
unsigned int i=0;
i<m;
i++) {
1875 xx[
i] = this->urand.number();
1878 for (
unsigned int i=0;
i<n;
i++) {
1879 y[
i] =
FadType(ndot, this->urand.number());
1880 for (
unsigned int k=0; k<ndot; k++)
1883 FadType alpha(ndot, this->urand.number());
1884 for (
unsigned int k=0; k<ndot; k++) {
1885 alpha.fastAccessDx(k) = this->urand.number();
1889 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1892 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1896 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1898 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1902 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
1906 sacado_blas2.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
1913 typedef decltype(this->fad)
FadType;
1918 auto ndot = this->ndot_;
1925 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1926 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1927 std::vector<ScalarType> yy(n);
1928 for (
unsigned int j=0; j<n; j++) {
1929 for (
unsigned int i=0;
i<m;
i++) {
1930 ScalarType
val = this->urand.number();
1936 for (
unsigned int k=0; k<ndot; k++) {
1937 val = this->urand.number();
1938 A1[
i+j*m].fastAccessDx(k) =
val;
1939 A2[
i+j*m].fastAccessDx(k) =
val;
1940 A3[
i+j*m].fastAccessDx(k) =
val;
1941 A4[
i+j*m].fastAccessDx(k) =
val;
1942 A5[
i+j*m].fastAccessDx(k) =
val;
1946 for (
unsigned int i=0;
i<m;
i++) {
1947 x[
i] =
FadType(ndot, this->urand.number());
1948 for (
unsigned int k=0; k<ndot; k++)
1951 for (
unsigned int i=0;
i<n;
i++) {
1952 yy[
i] = this->urand.number();
1955 FadType alpha(ndot, this->urand.number());
1956 for (
unsigned int k=0; k<ndot; k++) {
1957 alpha.fastAccessDx(k) = this->urand.number();
1961 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1964 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1968 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1970 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1974 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A4[0], m);
1978 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A5[0], m);
1985 typedef decltype(this->fad)
FadType;
1990 auto ndot = this->ndot_;
1997 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1998 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1999 std::vector<ScalarType> xx(n), yy(n);
2000 for (
unsigned int j=0; j<n; j++) {
2001 for (
unsigned int i=0;
i<m;
i++) {
2002 ScalarType
val = this->urand.number();
2008 for (
unsigned int k=0; k<ndot; k++) {
2009 val = this->urand.number();
2010 A1[
i+j*m].fastAccessDx(k) =
val;
2011 A2[
i+j*m].fastAccessDx(k) =
val;
2012 A3[
i+j*m].fastAccessDx(k) =
val;
2013 A4[
i+j*m].fastAccessDx(k) =
val;
2014 A5[
i+j*m].fastAccessDx(k) =
val;
2018 for (
unsigned int i=0;
i<m;
i++) {
2019 xx[
i] = this->urand.number();
2022 for (
unsigned int i=0;
i<n;
i++) {
2023 yy[
i] = this->urand.number();
2026 FadType alpha(ndot, this->urand.number());
2027 for (
unsigned int k=0; k<ndot; k++) {
2028 alpha.fastAccessDx(k) = this->urand.number();
2032 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
2035 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
2039 unsigned int sz = m*n*(1+ndot) + m + n;
2041 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
2045 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2049 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2056 typedef decltype(this->fad)
FadType;
2061 auto ndot = this->ndot_;
2068 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
2069 for (
unsigned int j=0; j<n; j++) {
2070 for (
unsigned int i=0;
i<m;
i++) {
2071 ScalarType
val = this->urand.number();
2077 for (
unsigned int i=0;
i<m;
i++) {
2078 x[
i] =
FadType(ndot, this->urand.number());
2079 for (
unsigned int k=0; k<ndot; k++)
2082 for (
unsigned int i=0;
i<n;
i++) {
2083 y[
i] =
FadType(ndot, this->urand.number());
2084 for (
unsigned int k=0; k<ndot; k++)
2087 FadType alpha(ndot, this->urand.number());
2088 for (
unsigned int k=0; k<ndot; k++) {
2089 alpha.fastAccessDx(k) = this->urand.number();
2093 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
2096 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
2100 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2102 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
2109 typedef decltype(this->fad)
FadType;
2115 auto ndot = this->ndot_;
2117 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2118 for (
unsigned int j=0; j<l; j++) {
2119 for (
unsigned int i=0;
i<m;
i++) {
2120 A[
i+j*m] =
FadType(ndot, this->urand.number());
2121 for (
unsigned int k=0; k<ndot; k++)
2125 for (
unsigned int j=0; j<n; j++) {
2126 for (
unsigned int i=0;
i<l;
i++) {
2127 B[
i+j*l] =
FadType(ndot, this->urand.number());
2128 for (
unsigned int k=0; k<ndot; k++)
2132 FadType alpha(ndot, this->urand.number());
2133 FadType beta(ndot, this->urand.number());
2134 for (
unsigned int k=0; k<ndot; k++) {
2135 alpha.fastAccessDx(k) = this->urand.number();
2136 beta.fastAccessDx(k) = this->urand.number();
2139 for (
unsigned int j=0; j<n; j++) {
2140 for (
unsigned int i=0;
i<m;
i++) {
2141 ScalarType
val = this->urand.number();
2145 for (
unsigned int k=0; k<ndot; k++) {
2146 val = this->urand.number();
2147 C1[
i+j*m].fastAccessDx(k) =
val;
2148 C2[
i+j*m].fastAccessDx(k) =
val;
2149 C3[
i+j*m].fastAccessDx(k) =
val;
2156 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2160 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2164 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2167 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2173 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2175 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2177 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2184 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2186 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2188 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2195 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2197 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2199 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2207 typedef decltype(this->fad)
FadType;
2213 auto ndot = this->ndot_;
2215 unsigned int lda = m+4;
2216 unsigned int ldb = l+4;
2217 unsigned int ldc = m+5;
2218 VectorType
A(lda*l,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2220 for (
unsigned int j=0; j<l; j++) {
2221 for (
unsigned int i=0;
i<lda;
i++) {
2222 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2223 for (
unsigned int k=0; k<ndot; k++)
2227 for (
unsigned int j=0; j<n; j++) {
2228 for (
unsigned int i=0;
i<ldb;
i++) {
2229 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2230 for (
unsigned int k=0; k<ndot; k++)
2234 FadType alpha(ndot, this->urand.number());
2235 FadType beta(ndot, this->urand.number());
2236 for (
unsigned int k=0; k<ndot; k++) {
2237 alpha.fastAccessDx(k) = this->urand.number();
2238 beta.fastAccessDx(k) = this->urand.number();
2241 for (
unsigned int j=0; j<n; j++) {
2242 for (
unsigned int i=0;
i<ldc;
i++) {
2243 ScalarType
val = this->urand.number();
2247 for (
unsigned int k=0; k<ndot; k++) {
2248 val = this->urand.number();
2249 C1[
i+j*ldc].fastAccessDx(k) =
val;
2250 C2[
i+j*ldc].fastAccessDx(k) =
val;
2251 C3[
i+j*ldc].fastAccessDx(k) =
val;
2258 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2262 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2266 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2269 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2276 typedef decltype(this->fad)
FadType;
2282 auto ndot = this->ndot_;
2284 unsigned int lda = l+3;
2285 unsigned int ldb = l+4;
2286 unsigned int ldc = m+5;
2287 VectorType
A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2289 for (
unsigned int j=0; j<m; j++) {
2290 for (
unsigned int i=0;
i<lda;
i++) {
2291 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2292 for (
unsigned int k=0; k<ndot; k++)
2296 for (
unsigned int j=0; j<n; j++) {
2297 for (
unsigned int i=0;
i<ldb;
i++) {
2298 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2299 for (
unsigned int k=0; k<ndot; k++)
2303 FadType alpha(ndot, this->urand.number());
2304 FadType beta(ndot, this->urand.number());
2305 for (
unsigned int k=0; k<ndot; k++) {
2306 alpha.fastAccessDx(k) = this->urand.number();
2307 beta.fastAccessDx(k) = this->urand.number();
2310 for (
unsigned int j=0; j<n; j++) {
2311 for (
unsigned int i=0;
i<ldc;
i++) {
2312 ScalarType
val = this->urand.number();
2316 for (
unsigned int k=0; k<ndot; k++) {
2317 val = this->urand.number();
2318 C1[
i+j*ldc].fastAccessDx(k) =
val;
2319 C2[
i+j*ldc].fastAccessDx(k) =
val;
2320 C3[
i+j*ldc].fastAccessDx(k) =
val;
2327 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2331 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2335 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2338 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2345 typedef decltype(this->fad)
FadType;
2351 auto ndot = this->ndot_;
2353 unsigned int lda = m+4;
2354 unsigned int ldb =
n+4;
2355 unsigned int ldc = m+5;
2356 VectorType
A(lda*l,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2358 for (
unsigned int j=0; j<l; j++) {
2359 for (
unsigned int i=0;
i<lda;
i++) {
2360 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2361 for (
unsigned int k=0; k<ndot; k++)
2365 for (
unsigned int j=0; j<l; j++) {
2366 for (
unsigned int i=0;
i<ldb;
i++) {
2367 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2368 for (
unsigned int k=0; k<ndot; k++)
2372 FadType alpha(ndot, this->urand.number());
2373 FadType beta(ndot, this->urand.number());
2374 for (
unsigned int k=0; k<ndot; k++) {
2375 alpha.fastAccessDx(k) = this->urand.number();
2376 beta.fastAccessDx(k) = this->urand.number();
2379 for (
unsigned int j=0; j<n; j++) {
2380 for (
unsigned int i=0;
i<ldc;
i++) {
2381 ScalarType
val = this->urand.number();
2385 for (
unsigned int k=0; k<ndot; k++) {
2386 val = this->urand.number();
2387 C1[
i+j*ldc].fastAccessDx(k) =
val;
2388 C2[
i+j*ldc].fastAccessDx(k) =
val;
2389 C3[
i+j*ldc].fastAccessDx(k) =
val;
2396 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2400 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2404 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2407 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2414 typedef decltype(this->fad)
FadType;
2420 auto ndot = this->ndot_;
2422 unsigned int lda = l+3;
2423 unsigned int ldb =
n+4;
2424 unsigned int ldc = m+5;
2425 VectorType
A(lda*m,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2427 for (
unsigned int j=0; j<m; j++) {
2428 for (
unsigned int i=0;
i<lda;
i++) {
2429 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2430 for (
unsigned int k=0; k<ndot; k++)
2434 for (
unsigned int j=0; j<l; j++) {
2435 for (
unsigned int i=0;
i<ldb;
i++) {
2436 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2437 for (
unsigned int k=0; k<ndot; k++)
2441 FadType alpha(ndot, this->urand.number());
2442 FadType beta(ndot, this->urand.number());
2443 for (
unsigned int k=0; k<ndot; k++) {
2444 alpha.fastAccessDx(k) = this->urand.number();
2445 beta.fastAccessDx(k) = this->urand.number();
2448 for (
unsigned int j=0; j<n; j++) {
2449 for (
unsigned int i=0;
i<ldc;
i++) {
2450 ScalarType
val = this->urand.number();
2454 for (
unsigned int k=0; k<ndot; k++) {
2455 val = this->urand.number();
2456 C1[
i+j*ldc].fastAccessDx(k) =
val;
2457 C2[
i+j*ldc].fastAccessDx(k) =
val;
2458 C3[
i+j*ldc].fastAccessDx(k) =
val;
2465 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2469 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2473 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2476 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2483 typedef decltype(this->fad)
FadType;
2489 auto ndot = this->ndot_;
2491 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2492 for (
unsigned int j=0; j<l; j++) {
2493 for (
unsigned int i=0;
i<m;
i++) {
2494 A[
i+j*m] =
FadType(ndot, this->urand.number());
2495 for (
unsigned int k=0; k<ndot; k++)
2499 for (
unsigned int j=0; j<n; j++) {
2500 for (
unsigned int i=0;
i<l;
i++) {
2501 B[
i+j*l] =
FadType(ndot, this->urand.number());
2502 for (
unsigned int k=0; k<ndot; k++)
2506 FadType alpha(ndot, this->urand.number());
2507 FadType beta(ndot, this->urand.number());
2508 for (
unsigned int k=0; k<ndot; k++) {
2509 alpha.fastAccessDx(k) = this->urand.number();
2510 beta.fastAccessDx(k) = this->urand.number();
2513 for (
unsigned int j=0; j<n; j++) {
2514 for (
unsigned int i=0;
i<m;
i++) {
2515 ScalarType
val = this->urand.number();
2524 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2528 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2532 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2535 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2541 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2543 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2545 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2552 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2554 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2556 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2563 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2565 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2567 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2575 typedef decltype(this->fad)
FadType;
2581 auto ndot = this->ndot_;
2583 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2584 for (
unsigned int j=0; j<l; j++) {
2585 for (
unsigned int i=0;
i<m;
i++) {
2586 A[
i+j*m] =
FadType(ndot, this->urand.number());
2587 for (
unsigned int k=0; k<ndot; k++)
2591 for (
unsigned int j=0; j<n; j++) {
2592 for (
unsigned int i=0;
i<l;
i++) {
2593 B[
i+j*l] =
FadType(ndot, this->urand.number());
2594 for (
unsigned int k=0; k<ndot; k++)
2598 ScalarType alpha = this->urand.number();
2599 ScalarType beta = this->urand.number();
2601 for (
unsigned int j=0; j<n; j++) {
2602 for (
unsigned int i=0;
i<m;
i++) {
2603 ScalarType
val = this->urand.number();
2607 for (
unsigned int k=0; k<ndot; k++) {
2608 val = this->urand.number();
2609 C1[
i+j*m].fastAccessDx(k) =
val;
2610 C2[
i+j*m].fastAccessDx(k) =
val;
2611 C3[
i+j*m].fastAccessDx(k) =
val;
2618 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2622 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2626 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2629 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2635 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2637 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2639 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2646 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2648 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2650 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2657 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2659 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2661 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2669 typedef decltype(this->fad)
FadType;
2675 auto ndot = this->ndot_;
2677 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2678 C4(m*n,ndot), C5(m*n,ndot);
2679 std::vector<ScalarType>
a(m*l);
2680 for (
unsigned int j=0; j<l; j++) {
2681 for (
unsigned int i=0;
i<m;
i++) {
2682 a[
i+j*m] = this->urand.number();
2683 A[
i+j*m] = a[
i+j*m];
2686 for (
unsigned int j=0; j<n; j++) {
2687 for (
unsigned int i=0;
i<l;
i++) {
2688 B[
i+j*l] =
FadType(ndot, this->urand.number());
2689 for (
unsigned int k=0; k<ndot; k++)
2693 FadType alpha(ndot, this->urand.number());
2694 FadType beta(ndot, this->urand.number());
2695 for (
unsigned int k=0; k<ndot; k++) {
2696 alpha.fastAccessDx(k) = this->urand.number();
2697 beta.fastAccessDx(k) = this->urand.number();
2700 for (
unsigned int j=0; j<n; j++) {
2701 for (
unsigned int i=0;
i<m;
i++) {
2702 ScalarType
val = this->urand.number();
2708 for (
unsigned int k=0; k<ndot; k++) {
2709 val = this->urand.number();
2710 C1[
i+j*m].fastAccessDx(k) =
val;
2711 C2[
i+j*m].fastAccessDx(k) =
val;
2712 C3[
i+j*m].fastAccessDx(k) =
val;
2713 C4[
i+j*m].fastAccessDx(k) =
val;
2714 C5[
i+j*m].fastAccessDx(k) =
val;
2721 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2725 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2729 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2732 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2737 &a[0], m, &
B[0], l, beta, &C4[0], m);
2742 &a[0], m, &
B[0], l, beta, &C5[0], m);
2748 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2750 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2752 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2754 &a[0], l, &
B[0], l, beta, &C4[0], m);
2756 &a[0], l, &
B[0], l, beta, &C5[0], m);
2765 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2767 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2769 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2771 &a[0], m, &
B[0], n, beta, &C4[0], m);
2773 &a[0], m, &
B[0], n, beta, &C5[0], m);
2782 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2784 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2786 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2788 &a[0], l, &
B[0], n, beta, &C4[0], m);
2790 &a[0], l, &
B[0], n, beta, &C5[0], m);
2800 typedef decltype(this->fad)
FadType;
2806 auto ndot = this->ndot_;
2808 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2809 C4(m*n,ndot), C5(m*n,ndot);
2810 std::vector<ScalarType> b(l*n);
2811 for (
unsigned int j=0; j<l; j++) {
2812 for (
unsigned int i=0;
i<m;
i++) {
2813 A[
i+j*m] =
FadType(ndot, this->urand.number());
2814 for (
unsigned int k=0; k<ndot; k++)
2818 for (
unsigned int j=0; j<n; j++) {
2819 for (
unsigned int i=0;
i<l;
i++) {
2820 b[
i+j*l] = this->urand.number();
2821 B[
i+j*l] = b[
i+j*l];
2824 FadType alpha(ndot, this->urand.number());
2825 FadType beta(ndot, this->urand.number());
2826 for (
unsigned int k=0; k<ndot; k++) {
2827 alpha.fastAccessDx(k) = this->urand.number();
2828 beta.fastAccessDx(k) = this->urand.number();
2831 for (
unsigned int j=0; j<n; j++) {
2832 for (
unsigned int i=0;
i<m;
i++) {
2833 ScalarType
val = this->urand.number();
2839 for (
unsigned int k=0; k<ndot; k++) {
2840 val = this->urand.number();
2841 C1[
i+j*m].fastAccessDx(k) =
val;
2842 C2[
i+j*m].fastAccessDx(k) =
val;
2843 C3[
i+j*m].fastAccessDx(k) =
val;
2844 C4[
i+j*m].fastAccessDx(k) =
val;
2845 C5[
i+j*m].fastAccessDx(k) =
val;
2852 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2856 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2860 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2863 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2868 &
A[0], m, &b[0], l, beta, &C4[0], m);
2873 &
A[0], m, &b[0], l, beta, &C5[0], m);
2879 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2881 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2883 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2885 &
A[0], l, &b[0], l, beta, &C4[0], m);
2887 &
A[0], l, &b[0], l, beta, &C5[0], m);
2896 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2898 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2900 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2902 &
A[0], m, &b[0], n, beta, &C4[0], m);
2904 &
A[0], m, &b[0], n, beta, &C5[0], m);
2913 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2915 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2917 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2919 &
A[0], l, &b[0], n, beta, &C4[0], m);
2921 &
A[0], l, &b[0], n, beta, &C5[0], m);
2931 typedef decltype(this->fad)
FadType;
2937 auto ndot = this->ndot_;
2939 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2940 C4(m*n,ndot), C5(m*n,ndot);
2941 std::vector<ScalarType>
a(m*l), b(l*n);
2942 for (
unsigned int j=0; j<l; j++) {
2943 for (
unsigned int i=0;
i<m;
i++) {
2944 a[
i+j*m] = this->urand.number();
2945 A[
i+j*m] =
a[
i+j*m];
2948 for (
unsigned int j=0; j<n; j++) {
2949 for (
unsigned int i=0;
i<l;
i++) {
2950 b[
i+j*l] = this->urand.number();
2951 B[
i+j*l] = b[
i+j*l];
2954 FadType alpha(ndot, this->urand.number());
2955 FadType beta(ndot, this->urand.number());
2956 for (
unsigned int k=0; k<ndot; k++) {
2957 alpha.fastAccessDx(k) = this->urand.number();
2958 beta.fastAccessDx(k) = this->urand.number();
2961 for (
unsigned int j=0; j<n; j++) {
2962 for (
unsigned int i=0;
i<m;
i++) {
2963 ScalarType
val = this->urand.number();
2969 for (
unsigned int k=0; k<ndot; k++) {
2970 val = this->urand.number();
2971 C1[
i+j*m].fastAccessDx(k) =
val;
2972 C2[
i+j*m].fastAccessDx(k) =
val;
2973 C3[
i+j*m].fastAccessDx(k) =
val;
2974 C4[
i+j*m].fastAccessDx(k) =
val;
2975 C5[
i+j*m].fastAccessDx(k) =
val;
2982 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2986 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2990 unsigned int sz = m*l + l*n + m*n*(1+ndot);
2993 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2998 &
a[0], m, &b[0], l, beta, &C4[0], m);
3003 &
a[0], m, &b[0], l, beta, &C5[0], m);
3009 &
A[0], l, &
B[0], l, beta, &C1[0], m);
3011 &
A[0], l, &
B[0], l, beta, &C2[0], m);
3013 &
A[0], l, &
B[0], l, beta, &C3[0], m);
3015 &
a[0], l, &b[0], l, beta, &C4[0], m);
3017 &
a[0], l, &b[0], l, beta, &C5[0], m);
3026 &
A[0], m, &
B[0], n, beta, &C1[0], m);
3028 &
A[0], m, &
B[0], n, beta, &C2[0], m);
3030 &
A[0], m, &
B[0], n, beta, &C3[0], m);
3032 &
a[0], m, &b[0], n, beta, &C4[0], m);
3034 &
a[0], m, &b[0], n, beta, &C5[0], m);
3043 &
A[0], l, &
B[0], n, beta, &C1[0], m);
3045 &
A[0], l, &
B[0], n, beta, &C2[0], m);
3047 &
A[0], l, &
B[0], n, beta, &C3[0], m);
3049 &
a[0], l, &b[0], n, beta, &C4[0], m);
3051 &
a[0], l, &b[0], n, beta, &C5[0], m);
3061 typedef decltype(this->fad)
FadType;
3066 auto ndot = this->ndot_;
3073 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3074 for (
unsigned int j=0; j<m; j++) {
3075 for (
unsigned int i=0;
i<m;
i++) {
3076 A[
i+j*m] =
FadType(ndot, this->urand.number());
3077 for (
unsigned int k=0; k<ndot; k++)
3081 for (
unsigned int j=0; j<n; j++) {
3082 for (
unsigned int i=0;
i<m;
i++) {
3083 B[
i+j*m] =
FadType(ndot, this->urand.number());
3084 for (
unsigned int k=0; k<ndot; k++)
3088 FadType alpha(ndot, this->urand.number());
3089 FadType beta(ndot, this->urand.number());
3090 for (
unsigned int k=0; k<ndot; k++) {
3091 alpha.fastAccessDx(k) = this->urand.number();
3092 beta.fastAccessDx(k) = this->urand.number();
3095 for (
unsigned int j=0; j<n; j++) {
3096 for (
unsigned int i=0;
i<m;
i++) {
3097 ScalarType
val = this->urand.number();
3101 for (
unsigned int k=0; k<ndot; k++) {
3102 val = this->urand.number();
3103 C1[
i+j*m].fastAccessDx(k) =
val;
3104 C2[
i+j*m].fastAccessDx(k) =
val;
3105 C3[
i+j*m].fastAccessDx(k) =
val;
3112 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3116 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3120 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3123 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3129 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3131 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3133 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3141 typedef decltype(this->fad)
FadType;
3146 auto ndot = this->ndot_;
3153 VectorType
A(
n*
n,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3154 for (
unsigned int j=0; j<n; j++) {
3155 for (
unsigned int i=0;
i<n;
i++) {
3156 A[
i+j*n] =
FadType(ndot, this->urand.number());
3157 for (
unsigned int k=0; k<ndot; k++)
3161 for (
unsigned int j=0; j<n; j++) {
3162 for (
unsigned int i=0;
i<m;
i++) {
3163 B[
i+j*m] =
FadType(ndot, this->urand.number());
3164 for (
unsigned int k=0; k<ndot; k++)
3168 FadType alpha(ndot, this->urand.number());
3169 FadType beta(ndot, this->urand.number());
3170 for (
unsigned int k=0; k<ndot; k++) {
3171 alpha.fastAccessDx(k) = this->urand.number();
3172 beta.fastAccessDx(k) = this->urand.number();
3175 for (
unsigned int j=0; j<n; j++) {
3176 for (
unsigned int i=0;
i<m;
i++) {
3177 ScalarType
val = this->urand.number();
3181 for (
unsigned int k=0; k<ndot; k++) {
3182 val = this->urand.number();
3183 C1[
i+j*m].fastAccessDx(k) =
val;
3184 C2[
i+j*m].fastAccessDx(k) =
val;
3185 C3[
i+j*m].fastAccessDx(k) =
val;
3192 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3196 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3200 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3203 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3209 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3211 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3213 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3221 typedef decltype(this->fad)
FadType;
3226 auto ndot = this->ndot_;
3233 unsigned int lda = m+4;
3234 unsigned int ldb = m+5;
3235 unsigned int ldc = m+6;
3236 VectorType
A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3238 for (
unsigned int j=0; j<m; j++) {
3239 for (
unsigned int i=0;
i<lda;
i++) {
3240 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3241 for (
unsigned int k=0; k<ndot; k++)
3245 for (
unsigned int j=0; j<n; j++) {
3246 for (
unsigned int i=0;
i<ldb;
i++) {
3247 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3248 for (
unsigned int k=0; k<ndot; k++)
3252 FadType alpha(ndot, this->urand.number());
3253 FadType beta(ndot, this->urand.number());
3254 for (
unsigned int k=0; k<ndot; k++) {
3255 alpha.fastAccessDx(k) = this->urand.number();
3256 beta.fastAccessDx(k) = this->urand.number();
3259 for (
unsigned int j=0; j<n; j++) {
3260 for (
unsigned int i=0;
i<ldc;
i++) {
3261 ScalarType
val = this->urand.number();
3265 for (
unsigned int k=0; k<ndot; k++) {
3266 val = this->urand.number();
3267 C1[
i+j*ldc].fastAccessDx(k) =
val;
3268 C2[
i+j*ldc].fastAccessDx(k) =
val;
3269 C3[
i+j*ldc].fastAccessDx(k) =
val;
3276 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3280 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3284 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3287 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3293 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3295 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3297 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3305 typedef decltype(this->fad)
FadType;
3310 auto ndot = this->ndot_;
3317 unsigned int lda =
n+4;
3318 unsigned int ldb = m+5;
3319 unsigned int ldc = m+6;
3320 VectorType
A(lda*
n,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3322 for (
unsigned int j=0; j<n; j++) {
3323 for (
unsigned int i=0;
i<lda;
i++) {
3324 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3325 for (
unsigned int k=0; k<ndot; k++)
3329 for (
unsigned int j=0; j<n; j++) {
3330 for (
unsigned int i=0;
i<ldb;
i++) {
3331 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3332 for (
unsigned int k=0; k<ndot; k++)
3336 FadType alpha(ndot, this->urand.number());
3337 FadType beta(ndot, this->urand.number());
3338 for (
unsigned int k=0; k<ndot; k++) {
3339 alpha.fastAccessDx(k) = this->urand.number();
3340 beta.fastAccessDx(k) = this->urand.number();
3343 for (
unsigned int j=0; j<n; j++) {
3344 for (
unsigned int i=0;
i<ldc;
i++) {
3345 ScalarType
val = this->urand.number();
3349 for (
unsigned int k=0; k<ndot; k++) {
3350 val = this->urand.number();
3351 C1[
i+j*ldc].fastAccessDx(k) =
val;
3352 C2[
i+j*ldc].fastAccessDx(k) =
val;
3353 C3[
i+j*ldc].fastAccessDx(k) =
val;
3360 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3364 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3368 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3371 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3377 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3379 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3381 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3389 typedef decltype(this->fad)
FadType;
3394 auto ndot = this->ndot_;
3401 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3402 for (
unsigned int j=0; j<m; j++) {
3403 for (
unsigned int i=0;
i<m;
i++) {
3404 A[
i+j*m] =
FadType(ndot, this->urand.number());
3405 for (
unsigned int k=0; k<ndot; k++)
3409 for (
unsigned int j=0; j<n; j++) {
3410 for (
unsigned int i=0;
i<m;
i++) {
3411 B[
i+j*m] =
FadType(ndot, this->urand.number());
3412 for (
unsigned int k=0; k<ndot; k++)
3416 FadType alpha(ndot, this->urand.number());
3417 FadType beta(ndot, this->urand.number());
3418 for (
unsigned int k=0; k<ndot; k++) {
3419 alpha.fastAccessDx(k) = this->urand.number();
3420 beta.fastAccessDx(k) = this->urand.number();
3423 for (
unsigned int j=0; j<n; j++) {
3424 for (
unsigned int i=0;
i<m;
i++) {
3425 ScalarType
val = this->urand.number();
3434 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3438 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3442 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3445 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3451 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3453 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3455 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3463 typedef decltype(this->fad)
FadType;
3468 auto ndot = this->ndot_;
3475 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3476 for (
unsigned int j=0; j<m; j++) {
3477 for (
unsigned int i=0;
i<m;
i++) {
3478 A[
i+j*m] =
FadType(ndot, this->urand.number());
3479 for (
unsigned int k=0; k<ndot; k++)
3483 for (
unsigned int j=0; j<n; j++) {
3484 for (
unsigned int i=0;
i<m;
i++) {
3485 B[
i+j*m] =
FadType(ndot, this->urand.number());
3486 for (
unsigned int k=0; k<ndot; k++)
3490 ScalarType alpha = this->urand.number();
3491 ScalarType beta = this->urand.number();
3493 for (
unsigned int j=0; j<n; j++) {
3494 for (
unsigned int i=0;
i<m;
i++) {
3495 ScalarType
val = this->urand.number();
3499 for (
unsigned int k=0; k<ndot; k++) {
3500 val = this->urand.number();
3501 C1[
i+j*m].fastAccessDx(k) =
val;
3502 C2[
i+j*m].fastAccessDx(k) =
val;
3503 C3[
i+j*m].fastAccessDx(k) =
val;
3510 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3514 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3518 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3521 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3527 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3529 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3531 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3539 typedef decltype(this->fad)
FadType;
3544 auto ndot = this->ndot_;
3551 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3552 C4(m*n,ndot), C5(m*n,ndot);
3553 std::vector<ScalarType>
a(m*m);
3554 for (
unsigned int j=0; j<m; j++) {
3555 for (
unsigned int i=0;
i<m;
i++) {
3556 a[
i+j*m] = this->urand.number();
3557 A[
i+j*m] = a[
i+j*m];
3560 for (
unsigned int j=0; j<n; j++) {
3561 for (
unsigned int i=0;
i<m;
i++) {
3562 B[
i+j*m] =
FadType(ndot, this->urand.number());
3563 for (
unsigned int k=0; k<ndot; k++)
3567 FadType alpha(ndot, this->urand.number());
3568 FadType beta(ndot, this->urand.number());
3569 for (
unsigned int k=0; k<ndot; k++) {
3570 alpha.fastAccessDx(k) = this->urand.number();
3571 beta.fastAccessDx(k) = this->urand.number();
3574 for (
unsigned int j=0; j<n; j++) {
3575 for (
unsigned int i=0;
i<m;
i++) {
3576 ScalarType
val = this->urand.number();
3582 for (
unsigned int k=0; k<ndot; k++) {
3583 val = this->urand.number();
3584 C1[
i+j*m].fastAccessDx(k) =
val;
3585 C2[
i+j*m].fastAccessDx(k) =
val;
3586 C3[
i+j*m].fastAccessDx(k) =
val;
3587 C4[
i+j*m].fastAccessDx(k) =
val;
3588 C5[
i+j*m].fastAccessDx(k) =
val;
3595 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3599 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3603 unsigned int sz = m*m + 2*m*n*(1+ndot);
3606 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3611 &a[0], m, &
B[0], m, beta, &C4[0], m);
3616 &a[0], m, &
B[0], m, beta, &C5[0], m);
3622 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3624 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3626 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3628 &a[0], m, &
B[0], m, beta, &C4[0], m);
3630 &a[0], m, &
B[0], m, beta, &C5[0], m);
3640 typedef decltype(this->fad)
FadType;
3645 auto ndot = this->ndot_;
3652 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3653 C4(m*n,ndot), C5(m*n,ndot);
3654 std::vector<ScalarType> b(m*n);
3655 for (
unsigned int j=0; j<m; j++) {
3656 for (
unsigned int i=0;
i<m;
i++) {
3657 A[
i+j*m] =
FadType(ndot, this->urand.number());
3658 for (
unsigned int k=0; k<ndot; k++)
3662 for (
unsigned int j=0; j<n; j++) {
3663 for (
unsigned int i=0;
i<m;
i++) {
3664 b[
i+j*m] = this->urand.number();
3665 B[
i+j*m] = b[
i+j*m];
3668 FadType alpha(ndot, this->urand.number());
3669 FadType beta(ndot, this->urand.number());
3670 for (
unsigned int k=0; k<ndot; k++) {
3671 alpha.fastAccessDx(k) = this->urand.number();
3672 beta.fastAccessDx(k) = this->urand.number();
3675 for (
unsigned int j=0; j<n; j++) {
3676 for (
unsigned int i=0;
i<m;
i++) {
3677 ScalarType
val = this->urand.number();
3683 for (
unsigned int k=0; k<ndot; k++) {
3684 val = this->urand.number();
3685 C1[
i+j*m].fastAccessDx(k) =
val;
3686 C2[
i+j*m].fastAccessDx(k) =
val;
3687 C3[
i+j*m].fastAccessDx(k) =
val;
3688 C4[
i+j*m].fastAccessDx(k) =
val;
3689 C5[
i+j*m].fastAccessDx(k) =
val;
3696 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3700 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3704 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3707 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3712 &
A[0], m, &b[0], m, beta, &C4[0], m);
3717 &
A[0], m, &b[0], m, beta, &C5[0], m);
3723 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3725 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3727 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3729 &
A[0], m, &b[0], m, beta, &C4[0], m);
3731 &
A[0], m, &b[0], m, beta, &C5[0], m);
3741 typedef decltype(this->fad)
FadType;
3746 auto ndot = this->ndot_;
3753 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3754 C4(m*n,ndot), C5(m*n,ndot);
3755 std::vector<ScalarType>
a(m*m), b(m*n);
3756 for (
unsigned int j=0; j<m; j++) {
3757 for (
unsigned int i=0;
i<m;
i++) {
3758 a[
i+j*m] = this->urand.number();
3759 A[
i+j*m] =
a[
i+j*m];
3762 for (
unsigned int j=0; j<n; j++) {
3763 for (
unsigned int i=0;
i<m;
i++) {
3764 b[
i+j*m] = this->urand.number();
3765 B[
i+j*m] = b[
i+j*m];
3768 FadType alpha(ndot, this->urand.number());
3769 FadType beta(ndot, this->urand.number());
3770 for (
unsigned int k=0; k<ndot; k++) {
3771 alpha.fastAccessDx(k) = this->urand.number();
3772 beta.fastAccessDx(k) = this->urand.number();
3775 for (
unsigned int j=0; j<n; j++) {
3776 for (
unsigned int i=0;
i<m;
i++) {
3777 ScalarType
val = this->urand.number();
3783 for (
unsigned int k=0; k<ndot; k++) {
3784 val = this->urand.number();
3785 C1[
i+j*m].fastAccessDx(k) =
val;
3786 C2[
i+j*m].fastAccessDx(k) =
val;
3787 C3[
i+j*m].fastAccessDx(k) =
val;
3788 C4[
i+j*m].fastAccessDx(k) =
val;
3789 C5[
i+j*m].fastAccessDx(k) =
val;
3796 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3800 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3804 unsigned int sz = m*m + m*n*(2+ndot);
3807 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3812 &
a[0], m, &b[0], m, beta, &C4[0], m);
3817 &
a[0], m, &b[0], m, beta, &C5[0], m);
3823 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3825 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3827 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3829 &
a[0], m, &b[0], m, beta, &C4[0], m);
3831 &
a[0], m, &b[0], m, beta, &C5[0], m);
3841 typedef decltype(this->fad)
FadType;
3846 auto ndot = this->ndot_;
3848 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
3849 for (
unsigned int j=0; j<m; j++) {
3850 for (
unsigned int i=0;
i<m;
i++) {
3851 A[
i+j*m] =
FadType(ndot, this->urand.number());
3852 for (
unsigned int k=0; k<ndot; k++)
3856 FadType alpha(ndot, this->urand.number());
3857 for (
unsigned int k=0; k<ndot; k++) {
3858 alpha.fastAccessDx(k) = this->urand.number();
3861 for (
unsigned int j=0; j<n; j++) {
3862 for (
unsigned int i=0;
i<m;
i++) {
3863 ScalarType
val = this->urand.number();
3867 for (
unsigned int k=0; k<ndot; k++) {
3868 val = this->urand.number();
3869 B1[
i+j*m].fastAccessDx(k) =
val;
3870 B2[
i+j*m].fastAccessDx(k) =
val;
3871 B3[
i+j*m].fastAccessDx(k) =
val;
3886 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3911 for (
unsigned int i=0;
i<m;
i++) {
3912 A[
i*m+
i].val() = 1.0;
3913 for (
unsigned int k=0; k<ndot; k++)
3928 typedef decltype(this->fad)
FadType;
3933 auto ndot = this->ndot_;
3935 VectorType
A(
n*
n,ndot),
B1(m*n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
3936 for (
unsigned int j=0; j<n; j++) {
3937 for (
unsigned int i=0;
i<n;
i++) {
3938 A[
i+j*n] =
FadType(ndot, this->urand.number());
3939 for (
unsigned int k=0; k<ndot; k++)
3943 FadType alpha(ndot, this->urand.number());
3944 for (
unsigned int k=0; k<ndot; k++) {
3945 alpha.fastAccessDx(k) = this->urand.number();
3948 for (
unsigned int j=0; j<n; j++) {
3949 for (
unsigned int i=0;
i<m;
i++) {
3950 ScalarType
val = this->urand.number();
3954 for (
unsigned int k=0; k<ndot; k++) {
3955 val = this->urand.number();
3956 B1[
i+j*m].fastAccessDx(k) =
val;
3957 B2[
i+j*m].fastAccessDx(k) =
val;
3958 B3[
i+j*m].fastAccessDx(k) =
val;
3973 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3998 for (
unsigned int i=0;
i<n;
i++) {
3999 A[
i*n+
i].val() = 1.0;
4000 for (
unsigned int k=0; k<ndot; k++)
4015 typedef decltype(this->fad)
FadType;
4020 auto ndot = this->ndot_;
4022 unsigned int lda = m+4;
4023 unsigned int ldb = m+5;
4024 VectorType
A(lda*m,ndot),
B1(ldb*
n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4025 for (
unsigned int j=0; j<m; j++) {
4026 for (
unsigned int i=0;
i<lda;
i++) {
4027 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4028 for (
unsigned int k=0; k<ndot; k++)
4032 FadType alpha(ndot, this->urand.number());
4033 for (
unsigned int k=0; k<ndot; k++) {
4034 alpha.fastAccessDx(k) = this->urand.number();
4037 for (
unsigned int j=0; j<n; j++) {
4038 for (
unsigned int i=0;
i<ldb;
i++) {
4039 ScalarType
val = this->urand.number();
4043 for (
unsigned int k=0; k<ndot; k++) {
4044 val = this->urand.number();
4045 B1[
i+j*ldb].fastAccessDx(k) =
val;
4046 B2[
i+j*ldb].fastAccessDx(k) =
val;
4047 B3[
i+j*ldb].fastAccessDx(k) =
val;
4062 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4087 for (
unsigned int i=0;
i<m;
i++) {
4088 A[
i*lda+
i].val() = 1.0;
4089 for (
unsigned int k=0; k<ndot; k++)
4104 typedef decltype(this->fad)
FadType;
4109 auto ndot = this->ndot_;
4111 unsigned int lda =
n+4;
4112 unsigned int ldb = m+5;
4113 VectorType
A(lda*
n,ndot),
B1(ldb*n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4114 for (
unsigned int j=0; j<n; j++) {
4115 for (
unsigned int i=0;
i<lda;
i++) {
4116 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4117 for (
unsigned int k=0; k<ndot; k++)
4121 FadType alpha(ndot, this->urand.number());
4122 for (
unsigned int k=0; k<ndot; k++) {
4123 alpha.fastAccessDx(k) = this->urand.number();
4126 for (
unsigned int j=0; j<n; j++) {
4127 for (
unsigned int i=0;
i<ldb;
i++) {
4128 ScalarType
val = this->urand.number();
4132 for (
unsigned int k=0; k<ndot; k++) {
4133 val = this->urand.number();
4134 B1[
i+j*ldb].fastAccessDx(k) =
val;
4135 B2[
i+j*ldb].fastAccessDx(k) =
val;
4136 B3[
i+j*ldb].fastAccessDx(k) =
val;
4151 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4176 for (
unsigned int i=0;
i<n;
i++) {
4177 A[
i*lda+
i].val() = 1.0;
4178 for (
unsigned int k=0; k<ndot; k++)
4193 typedef decltype(this->fad)
FadType;
4198 auto ndot = this->ndot_;
4200 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4201 for (
unsigned int j=0; j<m; j++) {
4202 for (
unsigned int i=0;
i<m;
i++) {
4203 A[
i+j*m] =
FadType(ndot, this->urand.number());
4204 for (
unsigned int k=0; k<ndot; k++)
4208 ScalarType alpha = this->urand.number();
4210 for (
unsigned int j=0; j<n; j++) {
4211 for (
unsigned int i=0;
i<m;
i++) {
4212 ScalarType
val = this->urand.number();
4216 for (
unsigned int k=0; k<ndot; k++) {
4217 val = this->urand.number();
4218 B1[
i+j*m].fastAccessDx(k) =
val;
4219 B2[
i+j*m].fastAccessDx(k) =
val;
4220 B3[
i+j*m].fastAccessDx(k) =
val;
4235 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4260 for (
unsigned int i=0;
i<m;
i++) {
4261 A[
i*m+
i].val() = 1.0;
4262 for (
unsigned int k=0; k<ndot; k++)
4277 typedef decltype(this->fad)
FadType;
4282 auto ndot = this->ndot_;
4284 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4285 for (
unsigned int j=0; j<m; j++) {
4286 for (
unsigned int i=0;
i<m;
i++) {
4287 A[
i+j*m] =
FadType(ndot, this->urand.number());
4288 for (
unsigned int k=0; k<ndot; k++)
4292 FadType alpha(ndot, this->urand.number());
4293 for (
unsigned int k=0; k<ndot; k++) {
4294 alpha.fastAccessDx(k) = this->urand.number();
4297 for (
unsigned int j=0; j<n; j++) {
4298 for (
unsigned int i=0;
i<m;
i++) {
4299 ScalarType
val = this->urand.number();
4316 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4341 for (
unsigned int i=0;
i<m;
i++) {
4342 A[
i*m+
i].val() = 1.0;
4343 for (
unsigned int k=0; k<ndot; k++)
4358 typedef decltype(this->fad)
FadType;
4363 auto ndot = this->ndot_;
4365 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot),
4366 B4(m*n,ndot), B5(m*n,ndot);
4367 std::vector<ScalarType>
a(m*m);
4368 for (
unsigned int j=0; j<m; j++) {
4369 for (
unsigned int i=0;
i<m;
i++) {
4370 a[
i+j*m] = this->urand.number();
4371 A[
i+j*m] = a[
i+j*m];
4374 FadType alpha(ndot, this->urand.number());
4375 for (
unsigned int k=0; k<ndot; k++) {
4376 alpha.fastAccessDx(k) = this->urand.number();
4379 for (
unsigned int j=0; j<n; j++) {
4380 for (
unsigned int i=0;
i<m;
i++) {
4381 ScalarType
val = this->urand.number();
4387 for (
unsigned int k=0; k<ndot; k++) {
4388 val = this->urand.number();
4389 B1[
i+j*m].fastAccessDx(k) =
val;
4390 B2[
i+j*m].fastAccessDx(k) =
val;
4391 B3[
i+j*m].fastAccessDx(k) =
val;
4392 B4[
i+j*m].fastAccessDx(k) =
val;
4393 B5[
i+j*m].fastAccessDx(k) =
val;
4408 unsigned int sz = m*m + m*n*(1+ndot);
4455 for (
unsigned int i=0;
i<m;
i++) {
4456 A[
i*m+
i].val() = 1.0;
4457 for (
unsigned int k=0; k<ndot; k++)
4478 typedef decltype(this->fad)
FadType;
4483 auto ndot = this->ndot_;
4485 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4486 for (
unsigned int j=0; j<m; j++) {
4487 for (
unsigned int i=0;
i<m;
i++) {
4489 A[
i+j*m] =
FadType(ndot, this->urand.number());
4490 for (
unsigned int k=0; k<ndot; k++)
4494 FadType alpha(ndot, this->urand.number());
4495 for (
unsigned int k=0; k<ndot; k++) {
4496 alpha.fastAccessDx(k) = this->urand.number();
4500 for (
unsigned int j=0; j<n; j++) {
4501 for (
unsigned int i=0;
i<m;
i++) {
4502 ScalarType
val = this->urand.number();
4509 for (
unsigned int k=0; k<ndot; k++) {
4510 val = this->urand.number();
4511 B1[
i+j*m].fastAccessDx(k) =
val;
4512 B2[
i+j*m].fastAccessDx(k) =
val;
4513 B3[
i+j*m].fastAccessDx(k) =
val;
4528 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4553 for (
unsigned int i=0;
i<m;
i++) {
4554 A[
i*m+
i].val() = 1.0;
4555 for (
unsigned int k=0; k<ndot; k++)
4570 typedef decltype(this->fad)
FadType;
4575 auto ndot = this->ndot_;
4577 VectorType
A(
n*
n,ndot),
B1(m*n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4578 for (
unsigned int j=0; j<n; j++) {
4579 for (
unsigned int i=0;
i<n;
i++) {
4580 A[
i+j*n] =
FadType(ndot, this->urand.number());
4581 for (
unsigned int k=0; k<ndot; k++)
4585 FadType alpha(ndot, this->urand.number());
4586 for (
unsigned int k=0; k<ndot; k++) {
4587 alpha.fastAccessDx(k) = this->urand.number();
4590 for (
unsigned int j=0; j<n; j++) {
4591 for (
unsigned int i=0;
i<m;
i++) {
4592 ScalarType
val = this->urand.number();
4596 for (
unsigned int k=0; k<ndot; k++) {
4597 val = this->urand.number();
4598 B1[
i+j*m].fastAccessDx(k) =
val;
4599 B2[
i+j*m].fastAccessDx(k) =
val;
4600 B3[
i+j*m].fastAccessDx(k) =
val;
4615 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4640 for (
unsigned int i=0;
i<n;
i++) {
4641 A[
i*n+
i].val() = 1.0;
4642 for (
unsigned int k=0; k<ndot; k++)
4657 typedef decltype(this->fad)
FadType;
4662 auto ndot = this->ndot_;
4664 unsigned int lda = m+4;
4665 unsigned int ldb = m+5;
4666 VectorType
A(lda*m,ndot),
B1(ldb*
n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4667 for (
unsigned int j=0; j<m; j++) {
4668 for (
unsigned int i=0;
i<lda;
i++) {
4669 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4670 for (
unsigned int k=0; k<ndot; k++)
4674 FadType alpha(ndot, this->urand.number());
4675 for (
unsigned int k=0; k<ndot; k++) {
4676 alpha.fastAccessDx(k) = this->urand.number();
4679 for (
unsigned int j=0; j<n; j++) {
4680 for (
unsigned int i=0;
i<ldb;
i++) {
4681 ScalarType
val = this->urand.number();
4685 for (
unsigned int k=0; k<ndot; k++) {
4686 val = this->urand.number();
4687 B1[
i+j*ldb].fastAccessDx(k) =
val;
4688 B2[
i+j*ldb].fastAccessDx(k) =
val;
4689 B3[
i+j*ldb].fastAccessDx(k) =
val;
4704 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4729 for (
unsigned int i=0;
i<m;
i++) {
4730 A[
i*lda+
i].val() = 1.0;
4731 for (
unsigned int k=0; k<ndot; k++)
4746 typedef decltype(this->fad)
FadType;
4751 auto ndot = this->ndot_;
4753 unsigned int lda =
n+4;
4754 unsigned int ldb = m+5;
4755 VectorType
A(lda*
n,ndot),
B1(ldb*n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4756 for (
unsigned int j=0; j<n; j++) {
4757 for (
unsigned int i=0;
i<lda;
i++) {
4758 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4759 for (
unsigned int k=0; k<ndot; k++)
4763 FadType alpha(ndot, this->urand.number());
4764 for (
unsigned int k=0; k<ndot; k++) {
4765 alpha.fastAccessDx(k) = this->urand.number();
4768 for (
unsigned int j=0; j<n; j++) {
4769 for (
unsigned int i=0;
i<ldb;
i++) {
4770 ScalarType
val = this->urand.number();
4774 for (
unsigned int k=0; k<ndot; k++) {
4775 val = this->urand.number();
4776 B1[
i+j*ldb].fastAccessDx(k) =
val;
4777 B2[
i+j*ldb].fastAccessDx(k) =
val;
4778 B3[
i+j*ldb].fastAccessDx(k) =
val;
4793 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4818 for (
unsigned int i=0;
i<n;
i++) {
4819 A[
i*lda+
i].val() = 1.0;
4820 for (
unsigned int k=0; k<ndot; k++)
4835 typedef decltype(this->fad)
FadType;
4840 auto ndot = this->ndot_;
4842 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4843 for (
unsigned int j=0; j<m; j++) {
4844 for (
unsigned int i=0;
i<m;
i++) {
4845 A[
i+j*m] =
FadType(ndot, this->urand.number());
4846 for (
unsigned int k=0; k<ndot; k++)
4850 ScalarType alpha = this->urand.number();
4852 for (
unsigned int j=0; j<n; j++) {
4853 for (
unsigned int i=0;
i<m;
i++) {
4854 ScalarType
val = this->urand.number();
4858 for (
unsigned int k=0; k<ndot; k++) {
4859 val = this->urand.number();
4860 B1[
i+j*m].fastAccessDx(k) =
val;
4861 B2[
i+j*m].fastAccessDx(k) =
val;
4862 B3[
i+j*m].fastAccessDx(k) =
val;
4877 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4902 for (
unsigned int i=0;
i<m;
i++) {
4903 A[
i*m+
i].val() = 1.0;
4904 for (
unsigned int k=0; k<ndot; k++)
4919 typedef decltype(this->fad)
FadType;
4924 auto ndot = this->ndot_;
4926 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4927 for (
unsigned int j=0; j<m; j++) {
4928 for (
unsigned int i=0;
i<m;
i++) {
4929 A[
i+j*m] =
FadType(ndot, this->urand.number());
4930 for (
unsigned int k=0; k<ndot; k++)
4934 FadType alpha(ndot, this->urand.number());
4935 for (
unsigned int k=0; k<ndot; k++) {
4936 alpha.fastAccessDx(k) = this->urand.number();
4939 for (
unsigned int j=0; j<n; j++) {
4940 for (
unsigned int i=0;
i<m;
i++) {
4941 ScalarType
val = this->urand.number();
4958 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4983 for (
unsigned int i=0;
i<m;
i++) {
4984 A[
i*m+
i].val() = 1.0;
4985 for (
unsigned int k=0; k<ndot; k++)
5000 typedef decltype(this->fad)
FadType;
5005 auto ndot = this->ndot_;
5007 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot),
5008 B4(m*n,ndot), B5(m*n,ndot);
5009 std::vector<ScalarType>
a(m*m);
5010 for (
unsigned int j=0; j<m; j++) {
5011 for (
unsigned int i=0;
i<m;
i++) {
5012 a[
i+j*m] = this->urand.number();
5013 A[
i+j*m] = a[
i+j*m];
5016 FadType alpha(ndot, this->urand.number());
5017 for (
unsigned int k=0; k<ndot; k++) {
5018 alpha.fastAccessDx(k) = this->urand.number();
5021 for (
unsigned int j=0; j<n; j++) {
5022 for (
unsigned int i=0;
i<m;
i++) {
5023 ScalarType
val = this->urand.number();
5029 for (
unsigned int k=0; k<ndot; k++) {
5030 val = this->urand.number();
5031 B1[
i+j*m].fastAccessDx(k) =
val;
5032 B2[
i+j*m].fastAccessDx(k) =
val;
5033 B3[
i+j*m].fastAccessDx(k) =
val;
5034 B4[
i+j*m].fastAccessDx(k) =
val;
5035 B5[
i+j*m].fastAccessDx(k) =
val;
5050 unsigned int sz = m*m + m*n*(1+ndot);
5097 for (
unsigned int i=0;
i<m;
i++) {
5098 A[
i*m+
i].val() = 1.0;
5099 for (
unsigned int k=0; k<ndot; k++)
5204 #endif // FADBLASUNITTESTS_HPP
Sacado::Random< ScalarType > urand
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Sacado::Random< double > real_urand
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sacado::ScalarType< FadType >::type ScalarType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
TYPED_TEST_SUITE_P(FadBLASUnitTests)
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
A random number generator that generates random numbers uniformly distributed in the interval (a...
#define COMPARE_FADS(a, b)