30 #ifndef FADBLASUNITTESTS_HPP
31 #define FADBLASUNITTESTS_HPP
43 #define COMPARE_FAD_VECTORS(X1, X2, n) \
44 ASSERT_TRUE(X1.size() == std::size_t(n)); \
45 ASSERT_TRUE(X2.size() == std::size_t(n)); \
46 for (unsigned int i=0; i<n; i++) { \
47 COMPARE_FADS(X1[i], X2[i]); \
52 template <
class FadType>
91 typedef decltype(this->fad)
FadType;
95 auto ndot = this->ndot_;
97 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
98 for (
unsigned int i=0;
i<m;
i++) {
99 ScalarType
val = this->urand.number();
103 for (
unsigned int k=0; k<ndot; k++) {
104 val = this->urand.number();
105 x1[
i].fastAccessDx(k) =
val;
106 x2[
i].fastAccessDx(k) =
val;
107 x3[
i].fastAccessDx(k) =
val;
110 FadType alpha(ndot, this->urand.number());
111 for (
unsigned int k=0; k<ndot; k++) {
112 alpha.fastAccessDx(k) = this->urand.number();
116 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
119 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
123 unsigned int sz = m*(1+ndot);
125 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
132 typedef decltype(this->fad)
FadType;
136 auto ndot = this->ndot_;
138 unsigned int incx = 2;
139 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
140 for (
unsigned int i=0;
i<m*incx;
i++) {
141 ScalarType
val = this->urand.number();
145 for (
unsigned int k=0; k<ndot; k++) {
146 val = this->urand.number();
147 x1[
i].fastAccessDx(k) =
val;
148 x2[
i].fastAccessDx(k) =
val;
149 x3[
i].fastAccessDx(k) =
val;
152 FadType alpha(ndot, this->urand.number());
153 for (
unsigned int k=0; k<ndot; k++) {
154 alpha.fastAccessDx(k) = this->urand.number();
158 teuchos_blas.
SCAL(m, alpha, &x1[0], incx);
161 sacado_blas.
SCAL(m, alpha, &x2[0], incx);
165 unsigned int sz = m*(1+ndot);
167 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
174 typedef decltype(this->fad)
FadType;
178 auto ndot = this->ndot_;
180 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
181 for (
unsigned int i=0;
i<m;
i++) {
182 ScalarType
val = this->urand.number();
186 for (
unsigned int k=0; k<ndot; k++) {
187 val = this->urand.number();
188 x1[
i].fastAccessDx(k) =
val;
189 x2[
i].fastAccessDx(k) =
val;
190 x3[
i].fastAccessDx(k) =
val;
193 ScalarType alpha = this->urand.number();
196 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
199 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
203 unsigned int sz = m*(1+ndot);
205 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
212 typedef decltype(this->fad)
FadType;
216 auto ndot = this->ndot_;
218 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
219 for (
unsigned int i=0;
i<m;
i++) {
220 ScalarType
val = this->urand.number();
225 FadType alpha =
FadType(ndot, this->urand.number());
226 for (
unsigned int k=0; k<ndot; k++)
227 alpha.fastAccessDx(k) = this->urand.number();
230 teuchos_blas.
SCAL(m, alpha, &x1[0], 1);
233 sacado_blas.
SCAL(m, alpha, &x2[0], 1);
237 unsigned int sz = m*(1+ndot);
239 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
246 typedef decltype(this->fad)
FadType;
250 auto ndot = this->ndot_;
252 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
253 for (
unsigned int i=0;
i<m;
i++) {
254 x[
i] =
FadType(ndot, this->urand.number());
255 ScalarType
val = this->urand.number();
259 for (
unsigned int k=0; k<ndot; k++) {
260 x[
i].fastAccessDx(k) = this->urand.number();
261 val = this->urand.number();
262 y1[
i].fastAccessDx(k) =
val;
263 y2[
i].fastAccessDx(k) =
val;
264 y3[
i].fastAccessDx(k) =
val;
269 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
272 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
276 unsigned int sz = 2*m*(1+ndot);
278 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
285 typedef decltype(this->fad)
FadType;
289 auto ndot = this->ndot_;
291 unsigned int incx = 2;
292 unsigned int incy = 3;
293 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
294 for (
unsigned int i=0;
i<m*incx;
i++) {
295 x[
i] =
FadType(ndot, this->urand.number());
296 for (
unsigned int k=0; k<ndot; k++) {
297 x[
i].fastAccessDx(k) = this->urand.number();
300 for (
unsigned int i=0;
i<m*incy;
i++) {
301 ScalarType
val = this->urand.number();
305 for (
unsigned int k=0; k<ndot; k++) {
306 val = this->urand.number();
307 y1[
i].fastAccessDx(k) =
val;
308 y2[
i].fastAccessDx(k) =
val;
309 y3[
i].fastAccessDx(k) =
val;
314 teuchos_blas.
COPY(m, &
x[0], incx, &y1[0], incy);
317 sacado_blas.
COPY(m, &
x[0], incx, &y2[0], incy);
321 unsigned int sz = 2*m*(1+ndot);
323 sacado_blas2.COPY(m, &
x[0], incx, &y3[0], incy);
330 typedef decltype(this->fad)
FadType;
334 auto ndot = this->ndot_;
336 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
337 for (
unsigned int i=0;
i<m;
i++) {
338 x[
i] = this->urand.number();
340 for (
unsigned int i=0;
i<m;
i++) {
341 ScalarType
val = this->urand.number();
345 for (
unsigned int k=0; k<ndot; k++) {
346 val = this->urand.number();
347 y1[
i].fastAccessDx(k) =
val;
348 y2[
i].fastAccessDx(k) =
val;
349 y3[
i].fastAccessDx(k) =
val;
354 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
357 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
361 unsigned int sz = 2*m*(1+ndot);
363 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
370 typedef decltype(this->fad)
FadType;
374 auto ndot = this->ndot_;
376 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
377 for (
unsigned int i=0;
i<m;
i++) {
378 x[
i] =
FadType(ndot, this->urand.number());
379 ScalarType
val = this->urand.number();
383 for (
unsigned int k=0; k<ndot; k++) {
384 x[
i].fastAccessDx(k) = this->urand.number();
389 teuchos_blas.
COPY(m, &
x[0], 1, &y1[0], 1);
392 sacado_blas.
COPY(m, &
x[0], 1, &y2[0], 1);
396 unsigned int sz = 2*m*(1+ndot);
398 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
405 typedef decltype(this->fad)
FadType;
409 auto ndot = this->ndot_;
411 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
412 for (
unsigned int i=0;
i<m;
i++) {
413 x[
i] =
FadType(ndot, this->urand.number());
414 ScalarType
val = this->urand.number();
418 for (
unsigned int k=0; k<ndot; k++) {
419 x[
i].fastAccessDx(k) = this->urand.number();
420 val = this->urand.number();
421 y1[
i].fastAccessDx(k) =
val;
422 y2[
i].fastAccessDx(k) =
val;
423 y3[
i].fastAccessDx(k) =
val;
426 FadType alpha(ndot, this->urand.number());
427 for (
unsigned int k=0; k<ndot; k++)
428 alpha.fastAccessDx(k) = this->urand.number();
431 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
434 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
438 unsigned int sz = 2*m*(1+ndot);
440 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
447 typedef decltype(this->fad)
FadType;
451 auto ndot = this->ndot_;
453 unsigned int incx = 2;
454 unsigned int incy = 3;
455 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
456 for (
unsigned int i=0;
i<m*incx;
i++) {
457 x[
i] =
FadType(ndot, this->urand.number());
458 for (
unsigned int k=0; k<ndot; k++) {
459 x[
i].fastAccessDx(k) = this->urand.number();
462 for (
unsigned int i=0;
i<m*incy;
i++) {
463 ScalarType
val = this->urand.number();
467 for (
unsigned int k=0; k<ndot; k++) {
468 val = this->urand.number();
469 y1[
i].fastAccessDx(k) =
val;
470 y2[
i].fastAccessDx(k) =
val;
471 y3[
i].fastAccessDx(k) =
val;
474 FadType alpha(ndot, this->urand.number());
475 for (
unsigned int k=0; k<ndot; k++)
476 alpha.fastAccessDx(k) = this->urand.number();
479 teuchos_blas.
AXPY(m, alpha, &
x[0], incx, &y1[0], incy);
482 sacado_blas.
AXPY(m, alpha, &
x[0], incx, &y2[0], incy);
486 unsigned int sz = 2*m*(1+ndot);
488 sacado_blas2.AXPY(m, alpha, &
x[0], incx, &y3[0], incy);
495 typedef decltype(this->fad)
FadType;
499 auto ndot = this->ndot_;
501 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
502 std::vector<ScalarType> xx(m);
503 for (
unsigned int i=0;
i<m;
i++) {
504 xx[
i] = this->urand.number();
506 ScalarType
val = this->urand.number();
511 for (
unsigned int k=0; k<ndot; k++) {
512 val = this->urand.number();
513 y1[
i].fastAccessDx(k) =
val;
514 y2[
i].fastAccessDx(k) =
val;
515 y3[
i].fastAccessDx(k) =
val;
516 y4[
i].fastAccessDx(k) =
val;
519 FadType alpha(ndot, this->urand.number());
520 for (
unsigned int k=0; k<ndot; k++)
521 alpha.fastAccessDx(k) = this->urand.number();
524 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
527 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
531 unsigned int sz = m*(1+ndot)+m;
533 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
537 sacado_blas.
AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
544 typedef decltype(this->fad)
FadType;
548 auto ndot = this->ndot_;
550 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
551 for (
unsigned int i=0;
i<m;
i++) {
552 x[
i] =
FadType(ndot, this->urand.number());
553 ScalarType
val = this->urand.number();
557 for (
unsigned int k=0; k<ndot; k++) {
558 x[
i].fastAccessDx(k) = this->urand.number();
561 FadType alpha(ndot, this->urand.number());
562 for (
unsigned int k=0; k<ndot; k++)
563 alpha.fastAccessDx(k) = this->urand.number();
566 teuchos_blas.
AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
569 sacado_blas.
AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
573 unsigned int sz = 2*m*(1+ndot);
575 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
582 typedef decltype(this->fad)
FadType;
585 auto ndot = this->ndot_;
587 VectorType X(m,ndot), Y(m,ndot);
588 for (
unsigned int i=0;
i<m;
i++) {
589 X[
i] =
FadType(ndot, this->real_urand.number());
590 Y[
i] =
FadType(ndot, this->real_urand.number());
591 for (
unsigned int k=0; k<ndot; k++) {
592 X[
i].fastAccessDx(k) = this->real_urand.number();
593 Y[
i].fastAccessDx(k) = this->real_urand.number();
598 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
601 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
605 unsigned int sz = 2*m*(1+ndot);
607 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
614 typedef decltype(this->fad)
FadType;
617 auto ndot = this->ndot_;
619 unsigned int incx = 2;
620 unsigned int incy = 3;
621 VectorType X(m*incx,ndot), Y(m*incy,ndot);
622 for (
unsigned int i=0;
i<m*incx;
i++) {
623 X[
i] =
FadType(ndot, this->real_urand.number());
624 for (
unsigned int k=0; k<ndot; k++) {
625 X[
i].fastAccessDx(k) = this->real_urand.number();
628 for (
unsigned int i=0;
i<m*incy;
i++) {
629 Y[
i] =
FadType(ndot, this->real_urand.number());
630 for (
unsigned int k=0; k<ndot; k++) {
631 Y[
i].fastAccessDx(k) = this->real_urand.number();
636 FadType z1 = teuchos_blas.
DOT(m, &X[0], incx, &Y[0], incy);
639 FadType z2 = sacado_blas.
DOT(m, &X[0], incx, &Y[0], incy);
643 unsigned int sz = 2*m*(1+ndot);
645 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
652 typedef decltype(this->fad)
FadType;
656 auto ndot = this->ndot_;
658 VectorType X(m,0), Y(m,ndot);
659 std::vector<ScalarType>
x(m);
660 for (
unsigned int i=0;
i<m;
i++) {
661 x[
i] = this->urand.number();
663 Y[
i] =
FadType(ndot, this->real_urand.number());
664 for (
unsigned int k=0; k<ndot; k++) {
665 Y[
i].fastAccessDx(k) = this->real_urand.number();
670 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
673 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
677 unsigned int sz = 2*m*(1+ndot);
679 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
683 FadType z4 = sacado_blas.
DOT(m, &x[0], 1, &Y[0], 1);
690 typedef decltype(this->fad)
FadType;
694 auto ndot = this->ndot_;
696 VectorType X(m,ndot), Y(m,0);
697 std::vector<ScalarType>
y(m);
698 for (
unsigned int i=0;
i<m;
i++) {
699 X[
i] =
FadType(ndot, this->real_urand.number());
700 y[
i] = this->urand.number();
702 for (
unsigned int k=0; k<ndot; k++) {
703 X[
i].fastAccessDx(k) = this->real_urand.number();
708 FadType z1 = teuchos_blas.
DOT(m, &X[0], 1, &Y[0], 1);
711 FadType z2 = sacado_blas.
DOT(m, &X[0], 1, &Y[0], 1);
715 unsigned int sz = 2*m*(1+ndot);
717 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
721 FadType z4 = sacado_blas.
DOT(m, &X[0], 1, &y[0], 1);
728 typedef decltype(this->fad)
FadType;
731 auto ndot = this->ndot_;
733 VectorType X(m,ndot);
734 for (
unsigned int i=0;
i<m;
i++) {
735 X[
i] =
FadType(ndot, this->real_urand.number());
736 for (
unsigned int k=0; k<ndot; k++) {
737 X[
i].fastAccessDx(k) = this->real_urand.number();
743 teuchos_blas.
NRM2(m, &X[0], 1);
747 sacado_blas.
NRM2(m, &X[0], 1);
751 unsigned int sz = m*(1+ndot);
754 sacado_blas2.NRM2(m, &X[0], 1);
761 typedef decltype(this->fad)
FadType;
764 auto ndot = this->ndot_;
766 unsigned int incx = 2;
767 VectorType X(m*incx,ndot);
768 for (
unsigned int i=0;
i<m*incx;
i++) {
769 X[
i] =
FadType(ndot, this->real_urand.number());
770 for (
unsigned int k=0; k<ndot; k++) {
771 X[
i].fastAccessDx(k) = this->real_urand.number();
777 teuchos_blas.
NRM2(m, &X[0], incx);
781 sacado_blas.
NRM2(m, &X[0], incx);
785 unsigned int sz = m*(1+ndot);
788 sacado_blas2.NRM2(m, &X[0], incx);
795 typedef decltype(this->fad)
FadType;
800 auto ndot = this->ndot_;
802 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
803 for (
unsigned int j=0; j<n; j++) {
804 for (
unsigned int i=0;
i<m;
i++) {
805 A[
i+j*m] =
FadType(ndot, this->urand.number());
806 for (
unsigned int k=0; k<ndot; k++)
809 B[j] =
FadType(ndot, this->urand.number());
810 for (
unsigned int k=0; k<ndot; k++)
813 FadType alpha(ndot, this->urand.number());
814 FadType beta(ndot, this->urand.number());
815 for (
unsigned int k=0; k<ndot; k++) {
816 alpha.fastAccessDx(k) = this->urand.number();
817 beta.fastAccessDx(k) = this->urand.number();
820 for (
unsigned int i=0;
i<m;
i++) {
821 ScalarType
val = this->urand.number();
825 for (
unsigned int k=0; k<ndot; k++) {
826 val = this->urand.number();
827 C1[
i].fastAccessDx(k) =
val;
828 C2[
i].fastAccessDx(k) =
val;
829 C3[
i].fastAccessDx(k) =
val;
843 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
853 typedef decltype(this->fad)
FadType;
858 auto ndot = this->ndot_;
860 unsigned int lda = m+3;
861 unsigned int incb = 2;
862 unsigned int incc = 3;
863 VectorType
A(lda*
n,ndot),
B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
865 for (
unsigned int j=0; j<n; j++) {
866 for (
unsigned int i=0;
i<lda;
i++) {
867 A[
i+j*lda] =
FadType(ndot, this->urand.number());
868 for (
unsigned int k=0; k<ndot; k++)
872 for (
unsigned int j=0; j<n*incb; j++) {
873 B[j] =
FadType(ndot, this->urand.number());
874 for (
unsigned int k=0; k<ndot; k++)
877 FadType alpha(ndot, this->urand.number());
878 FadType beta(ndot, this->urand.number());
879 for (
unsigned int k=0; k<ndot; k++) {
880 alpha.fastAccessDx(k) = this->urand.number();
881 beta.fastAccessDx(k) = this->urand.number();
884 for (
unsigned int i=0;
i<m*incc;
i++) {
885 ScalarType
val = this->urand.number();
889 for (
unsigned int k=0; k<ndot; k++) {
890 val = this->urand.number();
891 C1[
i].fastAccessDx(k) =
val;
892 C2[
i].fastAccessDx(k) =
val;
893 C3[
i].fastAccessDx(k) =
val;
907 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
917 typedef decltype(this->fad)
FadType;
922 auto ndot = this->ndot_;
924 VectorType
A(m*
n,ndot),
B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
925 for (
unsigned int j=0; j<n; j++) {
926 for (
unsigned int i=0;
i<m;
i++) {
927 A[
i+j*m] =
FadType(ndot, this->urand.number());
928 for (
unsigned int k=0; k<ndot; k++)
932 for (
unsigned int j=0; j<m; j++) {
933 B[j] =
FadType(ndot, this->urand.number());
934 for (
unsigned int k=0; k<ndot; k++)
937 FadType alpha(ndot, this->urand.number());
938 FadType beta(ndot, this->urand.number());
939 for (
unsigned int k=0; k<ndot; k++) {
940 alpha.fastAccessDx(k) = this->urand.number();
941 beta.fastAccessDx(k) = this->urand.number();
944 for (
unsigned int i=0;
i<n;
i++) {
945 ScalarType
val = this->urand.number();
949 for (
unsigned int k=0; k<ndot; k++) {
950 val = this->urand.number();
951 C1[
i].fastAccessDx(k) =
val;
952 C2[
i].fastAccessDx(k) =
val;
953 C3[
i].fastAccessDx(k) =
val;
967 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
977 typedef decltype(this->fad)
FadType;
982 auto ndot = this->ndot_;
984 unsigned int lda = m+3;
985 unsigned int incb = 2;
986 unsigned int incc = 3;
987 VectorType
A(lda*
n,ndot),
B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
989 for (
unsigned int j=0; j<n; j++) {
990 for (
unsigned int i=0;
i<lda;
i++) {
991 A[
i+j*lda] =
FadType(ndot, this->urand.number());
992 for (
unsigned int k=0; k<ndot; k++)
996 for (
unsigned int j=0; j<m*incb; j++) {
997 B[j] =
FadType(ndot, this->urand.number());
998 for (
unsigned int k=0; k<ndot; k++)
1001 FadType alpha(ndot, this->urand.number());
1002 FadType beta(ndot, this->urand.number());
1003 for (
unsigned int k=0; k<ndot; k++) {
1004 alpha.fastAccessDx(k) = this->urand.number();
1005 beta.fastAccessDx(k) = this->urand.number();
1008 for (
unsigned int i=0;
i<n*incc;
i++) {
1009 ScalarType
val = this->urand.number();
1013 for (
unsigned int k=0; k<ndot; k++) {
1014 val = this->urand.number();
1015 C1[
i].fastAccessDx(k) =
val;
1016 C2[
i].fastAccessDx(k) =
val;
1017 C3[
i].fastAccessDx(k) =
val;
1023 beta, &C1[0], incc);
1027 beta, &C2[0], incc);
1031 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1034 beta, &C3[0], incc);
1041 typedef decltype(this->fad)
FadType;
1046 auto ndot = this->ndot_;
1048 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1049 for (
unsigned int j=0; j<n; j++) {
1050 for (
unsigned int i=0;
i<m;
i++) {
1051 A[
i+j*m] =
FadType(ndot, this->urand.number());
1052 for (
unsigned int k=0; k<ndot; k++)
1055 B[j] =
FadType(ndot, this->urand.number());
1056 for (
unsigned int k=0; k<ndot; k++)
1059 FadType alpha(ndot, this->urand.number());
1060 FadType beta(ndot, this->urand.number());
1061 for (
unsigned int k=0; k<ndot; k++) {
1062 alpha.fastAccessDx(k) = this->urand.number();
1063 beta.fastAccessDx(k) = this->urand.number();
1066 for (
unsigned int i=0;
i<m;
i++) {
1067 ScalarType
val = this->urand.number();
1083 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1093 typedef decltype(this->fad)
FadType;
1098 auto ndot = this->ndot_;
1100 VectorType
A(m*
n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1101 for (
unsigned int j=0; j<n; j++) {
1102 for (
unsigned int i=0;
i<m;
i++) {
1103 A[
i+j*m] =
FadType(ndot, this->urand.number());
1104 for (
unsigned int k=0; k<ndot; k++)
1107 B[j] =
FadType(ndot, this->urand.number());
1108 for (
unsigned int k=0; k<ndot; k++)
1111 ScalarType alpha = this->urand.number();
1112 ScalarType beta = this->urand.number();
1114 for (
unsigned int i=0;
i<m;
i++) {
1115 ScalarType
val = this->urand.number();
1119 for (
unsigned int k=0; k<ndot; k++) {
1120 val = this->urand.number();
1121 C1[
i].fastAccessDx(k) =
val;
1122 C2[
i].fastAccessDx(k) =
val;
1123 C3[
i].fastAccessDx(k) =
val;
1137 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1147 typedef decltype(this->fad)
FadType;
1152 auto ndot = this->ndot_;
1154 VectorType
A(m*
n,ndot),
B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1156 std::vector<ScalarType> b(n);
1157 for (
unsigned int j=0; j<n; j++) {
1158 for (
unsigned int i=0;
i<m;
i++) {
1159 A[
i+j*m] =
FadType(ndot, this->urand.number());
1160 for (
unsigned int k=0; k<ndot; k++)
1163 b[j] = this->urand.number();
1166 FadType alpha(ndot, this->urand.number());
1167 FadType beta(ndot, this->urand.number());
1168 for (
unsigned int k=0; k<ndot; k++) {
1169 alpha.fastAccessDx(k) = this->urand.number();
1170 beta.fastAccessDx(k) = this->urand.number();
1173 for (
unsigned int i=0;
i<m;
i++) {
1174 ScalarType
val = this->urand.number();
1179 for (
unsigned int k=0; k<ndot; k++) {
1180 val = this->urand.number();
1181 C1[
i].fastAccessDx(k) =
val;
1182 C2[
i].fastAccessDx(k) =
val;
1183 C3[
i].fastAccessDx(k) =
val;
1184 C4[
i].fastAccessDx(k) =
val;
1198 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1213 typedef decltype(this->fad)
FadType;
1218 auto ndot = this->ndot_;
1220 VectorType
A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1222 std::vector<ScalarType>
a(m*n);
1223 for (
unsigned int j=0; j<n; j++) {
1224 for (
unsigned int i=0;
i<m;
i++) {
1225 a[
i+j*m] = this->urand.number();
1226 A[
i+j*m] = a[
i+j*m];
1228 B[j] =
FadType(ndot, this->urand.number());
1229 for (
unsigned int k=0; k<ndot; k++)
1232 FadType alpha(ndot, this->urand.number());
1233 FadType beta(ndot, this->urand.number());
1234 for (
unsigned int k=0; k<ndot; k++) {
1235 alpha.fastAccessDx(k) = this->urand.number();
1236 beta.fastAccessDx(k) = this->urand.number();
1239 for (
unsigned int i=0;
i<m;
i++) {
1240 ScalarType
val = this->urand.number();
1245 for (
unsigned int k=0; k<ndot; k++) {
1246 val = this->urand.number();
1247 C1[
i].fastAccessDx(k) =
val;
1248 C2[
i].fastAccessDx(k) =
val;
1249 C3[
i].fastAccessDx(k) =
val;
1250 C4[
i].fastAccessDx(k) =
val;
1264 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1279 typedef decltype(this->fad)
FadType;
1284 auto ndot = this->ndot_;
1286 VectorType
A(m*
n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1288 std::vector<ScalarType>
a(m*n), b(n);
1289 for (
unsigned int j=0; j<n; j++) {
1290 for (
unsigned int i=0;
i<m;
i++) {
1291 a[
i+j*m] = this->urand.number();
1292 A[
i+j*m] =
a[
i+j*m];
1294 b[j] = this->urand.number();
1297 FadType alpha(ndot, this->urand.number());
1298 FadType beta(ndot, this->urand.number());
1299 for (
unsigned int k=0; k<ndot; k++) {
1300 alpha.fastAccessDx(k) = this->urand.number();
1301 beta.fastAccessDx(k) = this->urand.number();
1304 for (
unsigned int i=0;
i<m;
i++) {
1305 ScalarType
val = this->urand.number();
1310 for (
unsigned int k=0; k<ndot; k++) {
1311 val = this->urand.number();
1312 C1[
i].fastAccessDx(k) =
val;
1313 C2[
i].fastAccessDx(k) =
val;
1314 C3[
i].fastAccessDx(k) =
val;
1315 C4[
i].fastAccessDx(k) =
val;
1329 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1344 typedef decltype(this->fad)
FadType;
1348 auto ndot = this->ndot_;
1350 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1351 for (
unsigned int j=0; j<n; j++) {
1352 for (
unsigned int i=0;
i<n;
i++) {
1353 A[
i+j*n] =
FadType(ndot, this->urand.number());
1354 for (
unsigned int k=0; k<ndot; k++)
1357 ScalarType
val = this->urand.number();
1361 for (
unsigned int k=0; k<ndot; k++) {
1362 val = this->urand.number();
1363 x1[j].fastAccessDx(k) =
val;
1364 x2[j].fastAccessDx(k) =
val;
1365 x3[j].fastAccessDx(k) =
val;
1379 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1404 for (
unsigned int i=0;
i<n;
i++) {
1405 A[
i*n+
i].val() = 1.0;
1406 for (
unsigned int k=0; k<ndot; k++)
1421 typedef decltype(this->fad)
FadType;
1425 auto ndot = this->ndot_;
1427 unsigned int lda =
n+3;
1428 unsigned int incx = 2;
1429 VectorType
A(lda*
n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1430 for (
unsigned int j=0; j<n; j++) {
1431 for (
unsigned int i=0;
i<lda;
i++) {
1432 A[
i+j*lda] =
FadType(ndot, this->urand.number());
1433 for (
unsigned int k=0; k<ndot; k++)
1437 for (
unsigned int j=0; j<n*incx; j++) {
1438 ScalarType
val = this->urand.number();
1442 for (
unsigned int k=0; k<ndot; k++) {
1443 val = this->urand.number();
1444 x1[j].fastAccessDx(k) =
val;
1445 x2[j].fastAccessDx(k) =
val;
1446 x3[j].fastAccessDx(k) =
val;
1460 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1485 for (
unsigned int i=0;
i<n;
i++) {
1486 A[
i*lda+
i].val() = 1.0;
1487 for (
unsigned int k=0; k<ndot; k++)
1502 typedef decltype(this->fad)
FadType;
1506 auto ndot = this->ndot_;
1508 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1510 std::vector<ScalarType>
a(n*n);
1511 for (
unsigned int j=0; j<n; j++) {
1512 for (
unsigned int i=0;
i<n;
i++) {
1513 a[
i+j*n] = this->urand.number();
1514 A[
i+j*n] = a[
i+j*n];
1516 ScalarType
val = this->urand.number();
1522 for (
unsigned int k=0; k<ndot; k++) {
1523 val = this->urand.number();
1524 x1[j].fastAccessDx(k) =
val;
1525 x2[j].fastAccessDx(k) =
val;
1526 x3[j].fastAccessDx(k) =
val;
1527 x4[j].fastAccessDx(k) =
val;
1528 x5[j].fastAccessDx(k) =
val;
1542 unsigned int sz = n*n+n*(1+ndot);
1589 for (
unsigned int i=0;
i<n;
i++) {
1590 A[
i*n+
i].val() = 1.0;
1591 for (
unsigned int k=0; k<ndot; k++)
1612 typedef decltype(this->fad)
FadType;
1616 auto ndot = this->ndot_;
1618 VectorType
A(
n*
n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1619 for (
unsigned int j=0; j<n; j++) {
1620 for (
unsigned int i=0;
i<n;
i++) {
1621 A[
i+j*n] =
FadType(ndot, this->urand.number());
1622 for (
unsigned int k=0; k<ndot; k++)
1625 ScalarType
val = this->urand.number();
1641 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1666 for (
unsigned int i=0;
i<n;
i++) {
1667 A[
i*n+
i].val() = 1.0;
1668 for (
unsigned int k=0; k<ndot; k++)
1683 typedef decltype(this->fad)
FadType;
1688 auto ndot = this->ndot_;
1695 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1696 for (
unsigned int j=0; j<n; j++) {
1697 for (
unsigned int i=0;
i<m;
i++) {
1698 ScalarType
val = this->urand.number();
1702 for (
unsigned int k=0; k<ndot; k++) {
1703 val = this->urand.number();
1704 A1[
i+j*m].fastAccessDx(k) =
val;
1705 A2[
i+j*m].fastAccessDx(k) =
val;
1706 A3[
i+j*m].fastAccessDx(k) =
val;
1710 for (
unsigned int i=0;
i<m;
i++) {
1711 x[
i] =
FadType(ndot, this->urand.number());
1712 for (
unsigned int k=0; k<ndot; k++)
1715 for (
unsigned int i=0;
i<n;
i++) {
1716 y[
i] =
FadType(ndot, this->urand.number());
1717 for (
unsigned int k=0; k<ndot; k++)
1720 FadType alpha(ndot, this->urand.number());
1721 for (
unsigned int k=0; k<ndot; k++) {
1722 alpha.fastAccessDx(k) = this->urand.number();
1726 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1729 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1733 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1735 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1742 typedef decltype(this->fad)
FadType;
1747 auto ndot = this->ndot_;
1754 unsigned int lda = m+3;
1755 unsigned int incx = 2;
1756 unsigned int incy = 3;
1757 VectorType A1(lda*
n,ndot), A2(lda*n,ndot), A3(lda*n,ndot),
x(m*incx,ndot),
1759 for (
unsigned int j=0; j<n; j++) {
1760 for (
unsigned int i=0;
i<lda;
i++) {
1761 ScalarType
val = this->urand.number();
1765 for (
unsigned int k=0; k<ndot; k++) {
1766 val = this->urand.number();
1767 A1[
i+j*lda].fastAccessDx(k) =
val;
1768 A2[
i+j*lda].fastAccessDx(k) =
val;
1769 A3[
i+j*lda].fastAccessDx(k) =
val;
1773 for (
unsigned int i=0;
i<m*incx;
i++) {
1774 x[
i] =
FadType(ndot, this->urand.number());
1775 for (
unsigned int k=0; k<ndot; k++)
1778 for (
unsigned int i=0;
i<n*incy;
i++) {
1779 y[
i] =
FadType(ndot, this->urand.number());
1780 for (
unsigned int k=0; k<ndot; k++)
1783 FadType alpha(ndot, this->urand.number());
1784 for (
unsigned int k=0; k<ndot; k++) {
1785 alpha.fastAccessDx(k) = this->urand.number();
1789 teuchos_blas.
GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A1[0], lda);
1792 sacado_blas.
GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A2[0], lda);
1796 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1798 sacado_blas2.GER(m, n, alpha, &
x[0], incx, &y[0], incy, &A3[0], lda);
1805 typedef decltype(this->fad)
FadType;
1810 auto ndot = this->ndot_;
1817 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1818 for (
unsigned int j=0; j<n; j++) {
1819 for (
unsigned int i=0;
i<m;
i++) {
1820 ScalarType
val = this->urand.number();
1824 for (
unsigned int k=0; k<ndot; k++) {
1825 val = this->urand.number();
1826 A1[
i+j*m].fastAccessDx(k) =
val;
1827 A2[
i+j*m].fastAccessDx(k) =
val;
1828 A3[
i+j*m].fastAccessDx(k) =
val;
1832 for (
unsigned int i=0;
i<m;
i++) {
1833 x[
i] =
FadType(ndot, this->urand.number());
1834 for (
unsigned int k=0; k<ndot; k++)
1837 for (
unsigned int i=0;
i<n;
i++) {
1838 y[
i] =
FadType(ndot, this->urand.number());
1839 for (
unsigned int k=0; k<ndot; k++)
1842 ScalarType alpha = this->urand.number();
1845 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1848 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1852 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1854 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1861 typedef decltype(this->fad)
FadType;
1866 auto ndot = this->ndot_;
1873 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1874 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1875 std::vector<ScalarType> xx(m);
1876 for (
unsigned int j=0; j<n; j++) {
1877 for (
unsigned int i=0;
i<m;
i++) {
1878 ScalarType
val = this->urand.number();
1884 for (
unsigned int k=0; k<ndot; k++) {
1885 val = this->urand.number();
1886 A1[
i+j*m].fastAccessDx(k) =
val;
1887 A2[
i+j*m].fastAccessDx(k) =
val;
1888 A3[
i+j*m].fastAccessDx(k) =
val;
1889 A4[
i+j*m].fastAccessDx(k) =
val;
1890 A5[
i+j*m].fastAccessDx(k) =
val;
1894 for (
unsigned int i=0;
i<m;
i++) {
1895 xx[
i] = this->urand.number();
1898 for (
unsigned int i=0;
i<n;
i++) {
1899 y[
i] =
FadType(ndot, this->urand.number());
1900 for (
unsigned int k=0; k<ndot; k++)
1903 FadType alpha(ndot, this->urand.number());
1904 for (
unsigned int k=0; k<ndot; k++) {
1905 alpha.fastAccessDx(k) = this->urand.number();
1909 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1912 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1916 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1918 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1922 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
1926 sacado_blas2.
GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
1933 typedef decltype(this->fad)
FadType;
1938 auto ndot = this->ndot_;
1945 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1946 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1947 std::vector<ScalarType> yy(n);
1948 for (
unsigned int j=0; j<n; j++) {
1949 for (
unsigned int i=0;
i<m;
i++) {
1950 ScalarType
val = this->urand.number();
1956 for (
unsigned int k=0; k<ndot; k++) {
1957 val = this->urand.number();
1958 A1[
i+j*m].fastAccessDx(k) =
val;
1959 A2[
i+j*m].fastAccessDx(k) =
val;
1960 A3[
i+j*m].fastAccessDx(k) =
val;
1961 A4[
i+j*m].fastAccessDx(k) =
val;
1962 A5[
i+j*m].fastAccessDx(k) =
val;
1966 for (
unsigned int i=0;
i<m;
i++) {
1967 x[
i] =
FadType(ndot, this->urand.number());
1968 for (
unsigned int k=0; k<ndot; k++)
1971 for (
unsigned int i=0;
i<n;
i++) {
1972 yy[
i] = this->urand.number();
1975 FadType alpha(ndot, this->urand.number());
1976 for (
unsigned int k=0; k<ndot; k++) {
1977 alpha.fastAccessDx(k) = this->urand.number();
1981 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
1984 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
1988 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1990 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
1994 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A4[0], m);
1998 sacado_blas2.
GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A5[0], m);
2005 typedef decltype(this->fad)
FadType;
2010 auto ndot = this->ndot_;
2017 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2018 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
2019 std::vector<ScalarType> xx(n), yy(n);
2020 for (
unsigned int j=0; j<n; j++) {
2021 for (
unsigned int i=0;
i<m;
i++) {
2022 ScalarType
val = this->urand.number();
2028 for (
unsigned int k=0; k<ndot; k++) {
2029 val = this->urand.number();
2030 A1[
i+j*m].fastAccessDx(k) =
val;
2031 A2[
i+j*m].fastAccessDx(k) =
val;
2032 A3[
i+j*m].fastAccessDx(k) =
val;
2033 A4[
i+j*m].fastAccessDx(k) =
val;
2034 A5[
i+j*m].fastAccessDx(k) =
val;
2038 for (
unsigned int i=0;
i<m;
i++) {
2039 xx[
i] = this->urand.number();
2042 for (
unsigned int i=0;
i<n;
i++) {
2043 yy[
i] = this->urand.number();
2046 FadType alpha(ndot, this->urand.number());
2047 for (
unsigned int k=0; k<ndot; k++) {
2048 alpha.fastAccessDx(k) = this->urand.number();
2052 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
2055 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
2059 unsigned int sz = m*n*(1+ndot) + m + n;
2061 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
2065 sacado_blas.
GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2069 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2076 typedef decltype(this->fad)
FadType;
2081 auto ndot = this->ndot_;
2088 VectorType A1(m*
n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
2089 for (
unsigned int j=0; j<n; j++) {
2090 for (
unsigned int i=0;
i<m;
i++) {
2091 ScalarType
val = this->urand.number();
2097 for (
unsigned int i=0;
i<m;
i++) {
2098 x[
i] =
FadType(ndot, this->urand.number());
2099 for (
unsigned int k=0; k<ndot; k++)
2102 for (
unsigned int i=0;
i<n;
i++) {
2103 y[
i] =
FadType(ndot, this->urand.number());
2104 for (
unsigned int k=0; k<ndot; k++)
2107 FadType alpha(ndot, this->urand.number());
2108 for (
unsigned int k=0; k<ndot; k++) {
2109 alpha.fastAccessDx(k) = this->urand.number();
2113 teuchos_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A1[0], m);
2116 sacado_blas.
GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A2[0], m);
2120 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2122 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &y[0], 1, &A3[0], m);
2129 typedef decltype(this->fad)
FadType;
2135 auto ndot = this->ndot_;
2137 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2138 for (
unsigned int j=0; j<l; j++) {
2139 for (
unsigned int i=0;
i<m;
i++) {
2140 A[
i+j*m] =
FadType(ndot, this->urand.number());
2141 for (
unsigned int k=0; k<ndot; k++)
2145 for (
unsigned int j=0; j<n; j++) {
2146 for (
unsigned int i=0;
i<l;
i++) {
2147 B[
i+j*l] =
FadType(ndot, this->urand.number());
2148 for (
unsigned int k=0; k<ndot; k++)
2152 FadType alpha(ndot, this->urand.number());
2153 FadType beta(ndot, this->urand.number());
2154 for (
unsigned int k=0; k<ndot; k++) {
2155 alpha.fastAccessDx(k) = this->urand.number();
2156 beta.fastAccessDx(k) = this->urand.number();
2159 for (
unsigned int j=0; j<n; j++) {
2160 for (
unsigned int i=0;
i<m;
i++) {
2161 ScalarType
val = this->urand.number();
2165 for (
unsigned int k=0; k<ndot; k++) {
2166 val = this->urand.number();
2167 C1[
i+j*m].fastAccessDx(k) =
val;
2168 C2[
i+j*m].fastAccessDx(k) =
val;
2169 C3[
i+j*m].fastAccessDx(k) =
val;
2176 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2180 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2184 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2187 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2193 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2195 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2197 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2204 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2206 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2208 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2215 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2217 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2219 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2227 typedef decltype(this->fad)
FadType;
2233 auto ndot = this->ndot_;
2235 unsigned int lda = m+4;
2236 unsigned int ldb = l+4;
2237 unsigned int ldc = m+5;
2238 VectorType
A(lda*l,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2240 for (
unsigned int j=0; j<l; j++) {
2241 for (
unsigned int i=0;
i<lda;
i++) {
2242 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2243 for (
unsigned int k=0; k<ndot; k++)
2247 for (
unsigned int j=0; j<n; j++) {
2248 for (
unsigned int i=0;
i<ldb;
i++) {
2249 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2250 for (
unsigned int k=0; k<ndot; k++)
2254 FadType alpha(ndot, this->urand.number());
2255 FadType beta(ndot, this->urand.number());
2256 for (
unsigned int k=0; k<ndot; k++) {
2257 alpha.fastAccessDx(k) = this->urand.number();
2258 beta.fastAccessDx(k) = this->urand.number();
2261 for (
unsigned int j=0; j<n; j++) {
2262 for (
unsigned int i=0;
i<ldc;
i++) {
2263 ScalarType
val = this->urand.number();
2267 for (
unsigned int k=0; k<ndot; k++) {
2268 val = this->urand.number();
2269 C1[
i+j*ldc].fastAccessDx(k) =
val;
2270 C2[
i+j*ldc].fastAccessDx(k) =
val;
2271 C3[
i+j*ldc].fastAccessDx(k) =
val;
2278 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2282 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2286 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2289 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2296 typedef decltype(this->fad)
FadType;
2302 auto ndot = this->ndot_;
2304 unsigned int lda = l+3;
2305 unsigned int ldb = l+4;
2306 unsigned int ldc = m+5;
2307 VectorType
A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2309 for (
unsigned int j=0; j<m; j++) {
2310 for (
unsigned int i=0;
i<lda;
i++) {
2311 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2312 for (
unsigned int k=0; k<ndot; k++)
2316 for (
unsigned int j=0; j<n; j++) {
2317 for (
unsigned int i=0;
i<ldb;
i++) {
2318 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2319 for (
unsigned int k=0; k<ndot; k++)
2323 FadType alpha(ndot, this->urand.number());
2324 FadType beta(ndot, this->urand.number());
2325 for (
unsigned int k=0; k<ndot; k++) {
2326 alpha.fastAccessDx(k) = this->urand.number();
2327 beta.fastAccessDx(k) = this->urand.number();
2330 for (
unsigned int j=0; j<n; j++) {
2331 for (
unsigned int i=0;
i<ldc;
i++) {
2332 ScalarType
val = this->urand.number();
2336 for (
unsigned int k=0; k<ndot; k++) {
2337 val = this->urand.number();
2338 C1[
i+j*ldc].fastAccessDx(k) =
val;
2339 C2[
i+j*ldc].fastAccessDx(k) =
val;
2340 C3[
i+j*ldc].fastAccessDx(k) =
val;
2347 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2351 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2355 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2358 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2365 typedef decltype(this->fad)
FadType;
2371 auto ndot = this->ndot_;
2373 unsigned int lda = m+4;
2374 unsigned int ldb =
n+4;
2375 unsigned int ldc = m+5;
2376 VectorType
A(lda*l,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2378 for (
unsigned int j=0; j<l; j++) {
2379 for (
unsigned int i=0;
i<lda;
i++) {
2380 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2381 for (
unsigned int k=0; k<ndot; k++)
2385 for (
unsigned int j=0; j<l; j++) {
2386 for (
unsigned int i=0;
i<ldb;
i++) {
2387 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2388 for (
unsigned int k=0; k<ndot; k++)
2392 FadType alpha(ndot, this->urand.number());
2393 FadType beta(ndot, this->urand.number());
2394 for (
unsigned int k=0; k<ndot; k++) {
2395 alpha.fastAccessDx(k) = this->urand.number();
2396 beta.fastAccessDx(k) = this->urand.number();
2399 for (
unsigned int j=0; j<n; j++) {
2400 for (
unsigned int i=0;
i<ldc;
i++) {
2401 ScalarType
val = this->urand.number();
2405 for (
unsigned int k=0; k<ndot; k++) {
2406 val = this->urand.number();
2407 C1[
i+j*ldc].fastAccessDx(k) =
val;
2408 C2[
i+j*ldc].fastAccessDx(k) =
val;
2409 C3[
i+j*ldc].fastAccessDx(k) =
val;
2416 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2420 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2424 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2427 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2434 typedef decltype(this->fad)
FadType;
2440 auto ndot = this->ndot_;
2442 unsigned int lda = l+3;
2443 unsigned int ldb =
n+4;
2444 unsigned int ldc = m+5;
2445 VectorType
A(lda*m,ndot),
B(ldb*l,ndot), C1(ldc*
n,ndot), C2(ldc*n,ndot),
2447 for (
unsigned int j=0; j<m; j++) {
2448 for (
unsigned int i=0;
i<lda;
i++) {
2449 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2450 for (
unsigned int k=0; k<ndot; k++)
2454 for (
unsigned int j=0; j<l; j++) {
2455 for (
unsigned int i=0;
i<ldb;
i++) {
2456 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2457 for (
unsigned int k=0; k<ndot; k++)
2461 FadType alpha(ndot, this->urand.number());
2462 FadType beta(ndot, this->urand.number());
2463 for (
unsigned int k=0; k<ndot; k++) {
2464 alpha.fastAccessDx(k) = this->urand.number();
2465 beta.fastAccessDx(k) = this->urand.number();
2468 for (
unsigned int j=0; j<n; j++) {
2469 for (
unsigned int i=0;
i<ldc;
i++) {
2470 ScalarType
val = this->urand.number();
2474 for (
unsigned int k=0; k<ndot; k++) {
2475 val = this->urand.number();
2476 C1[
i+j*ldc].fastAccessDx(k) =
val;
2477 C2[
i+j*ldc].fastAccessDx(k) =
val;
2478 C3[
i+j*ldc].fastAccessDx(k) =
val;
2485 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2489 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2493 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2496 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2503 typedef decltype(this->fad)
FadType;
2509 auto ndot = this->ndot_;
2511 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2512 for (
unsigned int j=0; j<l; j++) {
2513 for (
unsigned int i=0;
i<m;
i++) {
2514 A[
i+j*m] =
FadType(ndot, this->urand.number());
2515 for (
unsigned int k=0; k<ndot; k++)
2519 for (
unsigned int j=0; j<n; j++) {
2520 for (
unsigned int i=0;
i<l;
i++) {
2521 B[
i+j*l] =
FadType(ndot, this->urand.number());
2522 for (
unsigned int k=0; k<ndot; k++)
2526 FadType alpha(ndot, this->urand.number());
2527 FadType beta(ndot, this->urand.number());
2528 for (
unsigned int k=0; k<ndot; k++) {
2529 alpha.fastAccessDx(k) = this->urand.number();
2530 beta.fastAccessDx(k) = this->urand.number();
2533 for (
unsigned int j=0; j<n; j++) {
2534 for (
unsigned int i=0;
i<m;
i++) {
2535 ScalarType
val = this->urand.number();
2544 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2548 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2552 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2555 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2561 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2563 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2565 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2572 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2574 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2576 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2583 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2585 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2587 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2595 typedef decltype(this->fad)
FadType;
2601 auto ndot = this->ndot_;
2603 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2604 for (
unsigned int j=0; j<l; j++) {
2605 for (
unsigned int i=0;
i<m;
i++) {
2606 A[
i+j*m] =
FadType(ndot, this->urand.number());
2607 for (
unsigned int k=0; k<ndot; k++)
2611 for (
unsigned int j=0; j<n; j++) {
2612 for (
unsigned int i=0;
i<l;
i++) {
2613 B[
i+j*l] =
FadType(ndot, this->urand.number());
2614 for (
unsigned int k=0; k<ndot; k++)
2618 ScalarType alpha = this->urand.number();
2619 ScalarType beta = this->urand.number();
2621 for (
unsigned int j=0; j<n; j++) {
2622 for (
unsigned int i=0;
i<m;
i++) {
2623 ScalarType
val = this->urand.number();
2627 for (
unsigned int k=0; k<ndot; k++) {
2628 val = this->urand.number();
2629 C1[
i+j*m].fastAccessDx(k) =
val;
2630 C2[
i+j*m].fastAccessDx(k) =
val;
2631 C3[
i+j*m].fastAccessDx(k) =
val;
2638 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2642 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2646 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2649 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2655 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2657 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2659 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2666 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2668 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2670 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2677 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2679 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2681 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2689 typedef decltype(this->fad)
FadType;
2695 auto ndot = this->ndot_;
2697 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2698 C4(m*n,ndot), C5(m*n,ndot);
2699 std::vector<ScalarType>
a(m*l);
2700 for (
unsigned int j=0; j<l; j++) {
2701 for (
unsigned int i=0;
i<m;
i++) {
2702 a[
i+j*m] = this->urand.number();
2703 A[
i+j*m] = a[
i+j*m];
2706 for (
unsigned int j=0; j<n; j++) {
2707 for (
unsigned int i=0;
i<l;
i++) {
2708 B[
i+j*l] =
FadType(ndot, this->urand.number());
2709 for (
unsigned int k=0; k<ndot; k++)
2713 FadType alpha(ndot, this->urand.number());
2714 FadType beta(ndot, this->urand.number());
2715 for (
unsigned int k=0; k<ndot; k++) {
2716 alpha.fastAccessDx(k) = this->urand.number();
2717 beta.fastAccessDx(k) = this->urand.number();
2720 for (
unsigned int j=0; j<n; j++) {
2721 for (
unsigned int i=0;
i<m;
i++) {
2722 ScalarType
val = this->urand.number();
2728 for (
unsigned int k=0; k<ndot; k++) {
2729 val = this->urand.number();
2730 C1[
i+j*m].fastAccessDx(k) =
val;
2731 C2[
i+j*m].fastAccessDx(k) =
val;
2732 C3[
i+j*m].fastAccessDx(k) =
val;
2733 C4[
i+j*m].fastAccessDx(k) =
val;
2734 C5[
i+j*m].fastAccessDx(k) =
val;
2741 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2745 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2749 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2752 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2757 &a[0], m, &
B[0], l, beta, &C4[0], m);
2762 &a[0], m, &
B[0], l, beta, &C5[0], m);
2768 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2770 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2772 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2774 &a[0], l, &
B[0], l, beta, &C4[0], m);
2776 &a[0], l, &
B[0], l, beta, &C5[0], m);
2785 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2787 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2789 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2791 &a[0], m, &
B[0], n, beta, &C4[0], m);
2793 &a[0], m, &
B[0], n, beta, &C5[0], m);
2802 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2804 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2806 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2808 &a[0], l, &
B[0], n, beta, &C4[0], m);
2810 &a[0], l, &
B[0], n, beta, &C5[0], m);
2820 typedef decltype(this->fad)
FadType;
2826 auto ndot = this->ndot_;
2828 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2829 C4(m*n,ndot), C5(m*n,ndot);
2830 std::vector<ScalarType> b(l*n);
2831 for (
unsigned int j=0; j<l; j++) {
2832 for (
unsigned int i=0;
i<m;
i++) {
2833 A[
i+j*m] =
FadType(ndot, this->urand.number());
2834 for (
unsigned int k=0; k<ndot; k++)
2838 for (
unsigned int j=0; j<n; j++) {
2839 for (
unsigned int i=0;
i<l;
i++) {
2840 b[
i+j*l] = this->urand.number();
2841 B[
i+j*l] = b[
i+j*l];
2844 FadType alpha(ndot, this->urand.number());
2845 FadType beta(ndot, this->urand.number());
2846 for (
unsigned int k=0; k<ndot; k++) {
2847 alpha.fastAccessDx(k) = this->urand.number();
2848 beta.fastAccessDx(k) = this->urand.number();
2851 for (
unsigned int j=0; j<n; j++) {
2852 for (
unsigned int i=0;
i<m;
i++) {
2853 ScalarType
val = this->urand.number();
2859 for (
unsigned int k=0; k<ndot; k++) {
2860 val = this->urand.number();
2861 C1[
i+j*m].fastAccessDx(k) =
val;
2862 C2[
i+j*m].fastAccessDx(k) =
val;
2863 C3[
i+j*m].fastAccessDx(k) =
val;
2864 C4[
i+j*m].fastAccessDx(k) =
val;
2865 C5[
i+j*m].fastAccessDx(k) =
val;
2872 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2876 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2880 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2883 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2888 &
A[0], m, &b[0], l, beta, &C4[0], m);
2893 &
A[0], m, &b[0], l, beta, &C5[0], m);
2899 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2901 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2903 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2905 &
A[0], l, &b[0], l, beta, &C4[0], m);
2907 &
A[0], l, &b[0], l, beta, &C5[0], m);
2916 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2918 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2920 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2922 &
A[0], m, &b[0], n, beta, &C4[0], m);
2924 &
A[0], m, &b[0], n, beta, &C5[0], m);
2933 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2935 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2937 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2939 &
A[0], l, &b[0], n, beta, &C4[0], m);
2941 &
A[0], l, &b[0], n, beta, &C5[0], m);
2951 typedef decltype(this->fad)
FadType;
2957 auto ndot = this->ndot_;
2959 VectorType
A(m*l,ndot),
B(l*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2960 C4(m*n,ndot), C5(m*n,ndot);
2961 std::vector<ScalarType>
a(m*l), b(l*n);
2962 for (
unsigned int j=0; j<l; j++) {
2963 for (
unsigned int i=0;
i<m;
i++) {
2964 a[
i+j*m] = this->urand.number();
2965 A[
i+j*m] =
a[
i+j*m];
2968 for (
unsigned int j=0; j<n; j++) {
2969 for (
unsigned int i=0;
i<l;
i++) {
2970 b[
i+j*l] = this->urand.number();
2971 B[
i+j*l] = b[
i+j*l];
2974 FadType alpha(ndot, this->urand.number());
2975 FadType beta(ndot, this->urand.number());
2976 for (
unsigned int k=0; k<ndot; k++) {
2977 alpha.fastAccessDx(k) = this->urand.number();
2978 beta.fastAccessDx(k) = this->urand.number();
2981 for (
unsigned int j=0; j<n; j++) {
2982 for (
unsigned int i=0;
i<m;
i++) {
2983 ScalarType
val = this->urand.number();
2989 for (
unsigned int k=0; k<ndot; k++) {
2990 val = this->urand.number();
2991 C1[
i+j*m].fastAccessDx(k) =
val;
2992 C2[
i+j*m].fastAccessDx(k) =
val;
2993 C3[
i+j*m].fastAccessDx(k) =
val;
2994 C4[
i+j*m].fastAccessDx(k) =
val;
2995 C5[
i+j*m].fastAccessDx(k) =
val;
3002 &
A[0], m, &
B[0], l, beta, &C1[0], m);
3006 &
A[0], m, &
B[0], l, beta, &C2[0], m);
3010 unsigned int sz = m*l + l*n + m*n*(1+ndot);
3013 &
A[0], m, &
B[0], l, beta, &C3[0], m);
3018 &
a[0], m, &b[0], l, beta, &C4[0], m);
3023 &
a[0], m, &b[0], l, beta, &C5[0], m);
3029 &
A[0], l, &
B[0], l, beta, &C1[0], m);
3031 &
A[0], l, &
B[0], l, beta, &C2[0], m);
3033 &
A[0], l, &
B[0], l, beta, &C3[0], m);
3035 &
a[0], l, &b[0], l, beta, &C4[0], m);
3037 &
a[0], l, &b[0], l, beta, &C5[0], m);
3046 &
A[0], m, &
B[0], n, beta, &C1[0], m);
3048 &
A[0], m, &
B[0], n, beta, &C2[0], m);
3050 &
A[0], m, &
B[0], n, beta, &C3[0], m);
3052 &
a[0], m, &b[0], n, beta, &C4[0], m);
3054 &
a[0], m, &b[0], n, beta, &C5[0], m);
3063 &
A[0], l, &
B[0], n, beta, &C1[0], m);
3065 &
A[0], l, &
B[0], n, beta, &C2[0], m);
3067 &
A[0], l, &
B[0], n, beta, &C3[0], m);
3069 &
a[0], l, &b[0], n, beta, &C4[0], m);
3071 &
a[0], l, &b[0], n, beta, &C5[0], m);
3081 typedef decltype(this->fad)
FadType;
3086 auto ndot = this->ndot_;
3093 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3094 for (
unsigned int j=0; j<m; j++) {
3095 for (
unsigned int i=0;
i<m;
i++) {
3096 A[
i+j*m] =
FadType(ndot, this->urand.number());
3097 for (
unsigned int k=0; k<ndot; k++)
3101 for (
unsigned int j=0; j<n; j++) {
3102 for (
unsigned int i=0;
i<m;
i++) {
3103 B[
i+j*m] =
FadType(ndot, this->urand.number());
3104 for (
unsigned int k=0; k<ndot; k++)
3108 FadType alpha(ndot, this->urand.number());
3109 FadType beta(ndot, this->urand.number());
3110 for (
unsigned int k=0; k<ndot; k++) {
3111 alpha.fastAccessDx(k) = this->urand.number();
3112 beta.fastAccessDx(k) = this->urand.number();
3115 for (
unsigned int j=0; j<n; j++) {
3116 for (
unsigned int i=0;
i<m;
i++) {
3117 ScalarType
val = this->urand.number();
3121 for (
unsigned int k=0; k<ndot; k++) {
3122 val = this->urand.number();
3123 C1[
i+j*m].fastAccessDx(k) =
val;
3124 C2[
i+j*m].fastAccessDx(k) =
val;
3125 C3[
i+j*m].fastAccessDx(k) =
val;
3132 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3136 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3140 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3143 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3149 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3151 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3153 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3161 typedef decltype(this->fad)
FadType;
3166 auto ndot = this->ndot_;
3173 VectorType
A(
n*
n,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3174 for (
unsigned int j=0; j<n; j++) {
3175 for (
unsigned int i=0;
i<n;
i++) {
3176 A[
i+j*n] =
FadType(ndot, this->urand.number());
3177 for (
unsigned int k=0; k<ndot; k++)
3181 for (
unsigned int j=0; j<n; j++) {
3182 for (
unsigned int i=0;
i<m;
i++) {
3183 B[
i+j*m] =
FadType(ndot, this->urand.number());
3184 for (
unsigned int k=0; k<ndot; k++)
3188 FadType alpha(ndot, this->urand.number());
3189 FadType beta(ndot, this->urand.number());
3190 for (
unsigned int k=0; k<ndot; k++) {
3191 alpha.fastAccessDx(k) = this->urand.number();
3192 beta.fastAccessDx(k) = this->urand.number();
3195 for (
unsigned int j=0; j<n; j++) {
3196 for (
unsigned int i=0;
i<m;
i++) {
3197 ScalarType
val = this->urand.number();
3201 for (
unsigned int k=0; k<ndot; k++) {
3202 val = this->urand.number();
3203 C1[
i+j*m].fastAccessDx(k) =
val;
3204 C2[
i+j*m].fastAccessDx(k) =
val;
3205 C3[
i+j*m].fastAccessDx(k) =
val;
3212 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3216 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3220 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3223 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3229 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3231 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3233 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3241 typedef decltype(this->fad)
FadType;
3246 auto ndot = this->ndot_;
3253 unsigned int lda = m+4;
3254 unsigned int ldb = m+5;
3255 unsigned int ldc = m+6;
3256 VectorType
A(lda*m,ndot),
B(ldb*
n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3258 for (
unsigned int j=0; j<m; j++) {
3259 for (
unsigned int i=0;
i<lda;
i++) {
3260 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3261 for (
unsigned int k=0; k<ndot; k++)
3265 for (
unsigned int j=0; j<n; j++) {
3266 for (
unsigned int i=0;
i<ldb;
i++) {
3267 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3268 for (
unsigned int k=0; k<ndot; k++)
3272 FadType alpha(ndot, this->urand.number());
3273 FadType beta(ndot, this->urand.number());
3274 for (
unsigned int k=0; k<ndot; k++) {
3275 alpha.fastAccessDx(k) = this->urand.number();
3276 beta.fastAccessDx(k) = this->urand.number();
3279 for (
unsigned int j=0; j<n; j++) {
3280 for (
unsigned int i=0;
i<ldc;
i++) {
3281 ScalarType
val = this->urand.number();
3285 for (
unsigned int k=0; k<ndot; k++) {
3286 val = this->urand.number();
3287 C1[
i+j*ldc].fastAccessDx(k) =
val;
3288 C2[
i+j*ldc].fastAccessDx(k) =
val;
3289 C3[
i+j*ldc].fastAccessDx(k) =
val;
3296 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3300 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3304 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3307 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3313 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3315 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3317 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3325 typedef decltype(this->fad)
FadType;
3330 auto ndot = this->ndot_;
3337 unsigned int lda =
n+4;
3338 unsigned int ldb = m+5;
3339 unsigned int ldc = m+6;
3340 VectorType
A(lda*
n,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3342 for (
unsigned int j=0; j<n; j++) {
3343 for (
unsigned int i=0;
i<lda;
i++) {
3344 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3345 for (
unsigned int k=0; k<ndot; k++)
3349 for (
unsigned int j=0; j<n; j++) {
3350 for (
unsigned int i=0;
i<ldb;
i++) {
3351 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3352 for (
unsigned int k=0; k<ndot; k++)
3356 FadType alpha(ndot, this->urand.number());
3357 FadType beta(ndot, this->urand.number());
3358 for (
unsigned int k=0; k<ndot; k++) {
3359 alpha.fastAccessDx(k) = this->urand.number();
3360 beta.fastAccessDx(k) = this->urand.number();
3363 for (
unsigned int j=0; j<n; j++) {
3364 for (
unsigned int i=0;
i<ldc;
i++) {
3365 ScalarType
val = this->urand.number();
3369 for (
unsigned int k=0; k<ndot; k++) {
3370 val = this->urand.number();
3371 C1[
i+j*ldc].fastAccessDx(k) =
val;
3372 C2[
i+j*ldc].fastAccessDx(k) =
val;
3373 C3[
i+j*ldc].fastAccessDx(k) =
val;
3380 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3384 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3388 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3391 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3397 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3399 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3401 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3409 typedef decltype(this->fad)
FadType;
3414 auto ndot = this->ndot_;
3421 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3422 for (
unsigned int j=0; j<m; j++) {
3423 for (
unsigned int i=0;
i<m;
i++) {
3424 A[
i+j*m] =
FadType(ndot, this->urand.number());
3425 for (
unsigned int k=0; k<ndot; k++)
3429 for (
unsigned int j=0; j<n; j++) {
3430 for (
unsigned int i=0;
i<m;
i++) {
3431 B[
i+j*m] =
FadType(ndot, this->urand.number());
3432 for (
unsigned int k=0; k<ndot; k++)
3436 FadType alpha(ndot, this->urand.number());
3437 FadType beta(ndot, this->urand.number());
3438 for (
unsigned int k=0; k<ndot; k++) {
3439 alpha.fastAccessDx(k) = this->urand.number();
3440 beta.fastAccessDx(k) = this->urand.number();
3443 for (
unsigned int j=0; j<n; j++) {
3444 for (
unsigned int i=0;
i<m;
i++) {
3445 ScalarType
val = this->urand.number();
3454 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3458 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3462 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3465 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3471 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3473 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3475 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3483 typedef decltype(this->fad)
FadType;
3488 auto ndot = this->ndot_;
3495 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3496 for (
unsigned int j=0; j<m; j++) {
3497 for (
unsigned int i=0;
i<m;
i++) {
3498 A[
i+j*m] =
FadType(ndot, this->urand.number());
3499 for (
unsigned int k=0; k<ndot; k++)
3503 for (
unsigned int j=0; j<n; j++) {
3504 for (
unsigned int i=0;
i<m;
i++) {
3505 B[
i+j*m] =
FadType(ndot, this->urand.number());
3506 for (
unsigned int k=0; k<ndot; k++)
3510 ScalarType alpha = this->urand.number();
3511 ScalarType beta = this->urand.number();
3513 for (
unsigned int j=0; j<n; j++) {
3514 for (
unsigned int i=0;
i<m;
i++) {
3515 ScalarType
val = this->urand.number();
3519 for (
unsigned int k=0; k<ndot; k++) {
3520 val = this->urand.number();
3521 C1[
i+j*m].fastAccessDx(k) =
val;
3522 C2[
i+j*m].fastAccessDx(k) =
val;
3523 C3[
i+j*m].fastAccessDx(k) =
val;
3530 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3534 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3538 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3541 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3547 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3549 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3551 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3559 typedef decltype(this->fad)
FadType;
3564 auto ndot = this->ndot_;
3571 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3572 C4(m*n,ndot), C5(m*n,ndot);
3573 std::vector<ScalarType>
a(m*m);
3574 for (
unsigned int j=0; j<m; j++) {
3575 for (
unsigned int i=0;
i<m;
i++) {
3576 a[
i+j*m] = this->urand.number();
3577 A[
i+j*m] = a[
i+j*m];
3580 for (
unsigned int j=0; j<n; j++) {
3581 for (
unsigned int i=0;
i<m;
i++) {
3582 B[
i+j*m] =
FadType(ndot, this->urand.number());
3583 for (
unsigned int k=0; k<ndot; k++)
3587 FadType alpha(ndot, this->urand.number());
3588 FadType beta(ndot, this->urand.number());
3589 for (
unsigned int k=0; k<ndot; k++) {
3590 alpha.fastAccessDx(k) = this->urand.number();
3591 beta.fastAccessDx(k) = this->urand.number();
3594 for (
unsigned int j=0; j<n; j++) {
3595 for (
unsigned int i=0;
i<m;
i++) {
3596 ScalarType
val = this->urand.number();
3602 for (
unsigned int k=0; k<ndot; k++) {
3603 val = this->urand.number();
3604 C1[
i+j*m].fastAccessDx(k) =
val;
3605 C2[
i+j*m].fastAccessDx(k) =
val;
3606 C3[
i+j*m].fastAccessDx(k) =
val;
3607 C4[
i+j*m].fastAccessDx(k) =
val;
3608 C5[
i+j*m].fastAccessDx(k) =
val;
3615 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3619 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3623 unsigned int sz = m*m + 2*m*n*(1+ndot);
3626 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3631 &a[0], m, &
B[0], m, beta, &C4[0], m);
3636 &a[0], m, &
B[0], m, beta, &C5[0], m);
3642 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3644 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3646 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3648 &a[0], m, &
B[0], m, beta, &C4[0], m);
3650 &a[0], m, &
B[0], m, beta, &C5[0], m);
3660 typedef decltype(this->fad)
FadType;
3665 auto ndot = this->ndot_;
3672 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3673 C4(m*n,ndot), C5(m*n,ndot);
3674 std::vector<ScalarType> b(m*n);
3675 for (
unsigned int j=0; j<m; j++) {
3676 for (
unsigned int i=0;
i<m;
i++) {
3677 A[
i+j*m] =
FadType(ndot, this->urand.number());
3678 for (
unsigned int k=0; k<ndot; k++)
3682 for (
unsigned int j=0; j<n; j++) {
3683 for (
unsigned int i=0;
i<m;
i++) {
3684 b[
i+j*m] = this->urand.number();
3685 B[
i+j*m] = b[
i+j*m];
3688 FadType alpha(ndot, this->urand.number());
3689 FadType beta(ndot, this->urand.number());
3690 for (
unsigned int k=0; k<ndot; k++) {
3691 alpha.fastAccessDx(k) = this->urand.number();
3692 beta.fastAccessDx(k) = this->urand.number();
3695 for (
unsigned int j=0; j<n; j++) {
3696 for (
unsigned int i=0;
i<m;
i++) {
3697 ScalarType
val = this->urand.number();
3703 for (
unsigned int k=0; k<ndot; k++) {
3704 val = this->urand.number();
3705 C1[
i+j*m].fastAccessDx(k) =
val;
3706 C2[
i+j*m].fastAccessDx(k) =
val;
3707 C3[
i+j*m].fastAccessDx(k) =
val;
3708 C4[
i+j*m].fastAccessDx(k) =
val;
3709 C5[
i+j*m].fastAccessDx(k) =
val;
3716 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3720 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3724 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3727 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3732 &
A[0], m, &b[0], m, beta, &C4[0], m);
3737 &
A[0], m, &b[0], m, beta, &C5[0], m);
3743 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3745 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3747 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3749 &
A[0], m, &b[0], m, beta, &C4[0], m);
3751 &
A[0], m, &b[0], m, beta, &C5[0], m);
3761 typedef decltype(this->fad)
FadType;
3766 auto ndot = this->ndot_;
3773 VectorType
A(m*m,ndot),
B(m*
n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774 C4(m*n,ndot), C5(m*n,ndot);
3775 std::vector<ScalarType>
a(m*m), b(m*n);
3776 for (
unsigned int j=0; j<m; j++) {
3777 for (
unsigned int i=0;
i<m;
i++) {
3778 a[
i+j*m] = this->urand.number();
3779 A[
i+j*m] =
a[
i+j*m];
3782 for (
unsigned int j=0; j<n; j++) {
3783 for (
unsigned int i=0;
i<m;
i++) {
3784 b[
i+j*m] = this->urand.number();
3785 B[
i+j*m] = b[
i+j*m];
3788 FadType alpha(ndot, this->urand.number());
3789 FadType beta(ndot, this->urand.number());
3790 for (
unsigned int k=0; k<ndot; k++) {
3791 alpha.fastAccessDx(k) = this->urand.number();
3792 beta.fastAccessDx(k) = this->urand.number();
3795 for (
unsigned int j=0; j<n; j++) {
3796 for (
unsigned int i=0;
i<m;
i++) {
3797 ScalarType
val = this->urand.number();
3803 for (
unsigned int k=0; k<ndot; k++) {
3804 val = this->urand.number();
3805 C1[
i+j*m].fastAccessDx(k) =
val;
3806 C2[
i+j*m].fastAccessDx(k) =
val;
3807 C3[
i+j*m].fastAccessDx(k) =
val;
3808 C4[
i+j*m].fastAccessDx(k) =
val;
3809 C5[
i+j*m].fastAccessDx(k) =
val;
3816 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3820 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3824 unsigned int sz = m*m + m*n*(2+ndot);
3827 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3832 &
a[0], m, &b[0], m, beta, &C4[0], m);
3837 &
a[0], m, &b[0], m, beta, &C5[0], m);
3843 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3845 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3847 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3849 &
a[0], m, &b[0], m, beta, &C4[0], m);
3851 &
a[0], m, &b[0], m, beta, &C5[0], m);
3861 typedef decltype(this->fad)
FadType;
3866 auto ndot = this->ndot_;
3868 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
3869 for (
unsigned int j=0; j<m; j++) {
3870 for (
unsigned int i=0;
i<m;
i++) {
3871 A[
i+j*m] =
FadType(ndot, this->urand.number());
3872 for (
unsigned int k=0; k<ndot; k++)
3876 FadType alpha(ndot, this->urand.number());
3877 for (
unsigned int k=0; k<ndot; k++) {
3878 alpha.fastAccessDx(k) = this->urand.number();
3881 for (
unsigned int j=0; j<n; j++) {
3882 for (
unsigned int i=0;
i<m;
i++) {
3883 ScalarType
val = this->urand.number();
3887 for (
unsigned int k=0; k<ndot; k++) {
3888 val = this->urand.number();
3889 B1[
i+j*m].fastAccessDx(k) =
val;
3890 B2[
i+j*m].fastAccessDx(k) =
val;
3891 B3[
i+j*m].fastAccessDx(k) =
val;
3906 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3931 for (
unsigned int i=0;
i<m;
i++) {
3932 A[
i*m+
i].val() = 1.0;
3933 for (
unsigned int k=0; k<ndot; k++)
3948 typedef decltype(this->fad)
FadType;
3953 auto ndot = this->ndot_;
3955 VectorType
A(
n*
n,ndot),
B1(m*n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
3956 for (
unsigned int j=0; j<n; j++) {
3957 for (
unsigned int i=0;
i<n;
i++) {
3958 A[
i+j*n] =
FadType(ndot, this->urand.number());
3959 for (
unsigned int k=0; k<ndot; k++)
3963 FadType alpha(ndot, this->urand.number());
3964 for (
unsigned int k=0; k<ndot; k++) {
3965 alpha.fastAccessDx(k) = this->urand.number();
3968 for (
unsigned int j=0; j<n; j++) {
3969 for (
unsigned int i=0;
i<m;
i++) {
3970 ScalarType
val = this->urand.number();
3974 for (
unsigned int k=0; k<ndot; k++) {
3975 val = this->urand.number();
3976 B1[
i+j*m].fastAccessDx(k) =
val;
3977 B2[
i+j*m].fastAccessDx(k) =
val;
3978 B3[
i+j*m].fastAccessDx(k) =
val;
3993 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4018 for (
unsigned int i=0;
i<n;
i++) {
4019 A[
i*n+
i].val() = 1.0;
4020 for (
unsigned int k=0; k<ndot; k++)
4035 typedef decltype(this->fad)
FadType;
4040 auto ndot = this->ndot_;
4042 unsigned int lda = m+4;
4043 unsigned int ldb = m+5;
4044 VectorType
A(lda*m,ndot),
B1(ldb*
n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4045 for (
unsigned int j=0; j<m; j++) {
4046 for (
unsigned int i=0;
i<lda;
i++) {
4047 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4048 for (
unsigned int k=0; k<ndot; k++)
4052 FadType alpha(ndot, this->urand.number());
4053 for (
unsigned int k=0; k<ndot; k++) {
4054 alpha.fastAccessDx(k) = this->urand.number();
4057 for (
unsigned int j=0; j<n; j++) {
4058 for (
unsigned int i=0;
i<ldb;
i++) {
4059 ScalarType
val = this->urand.number();
4063 for (
unsigned int k=0; k<ndot; k++) {
4064 val = this->urand.number();
4065 B1[
i+j*ldb].fastAccessDx(k) =
val;
4066 B2[
i+j*ldb].fastAccessDx(k) =
val;
4067 B3[
i+j*ldb].fastAccessDx(k) =
val;
4082 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4107 for (
unsigned int i=0;
i<m;
i++) {
4108 A[
i*lda+
i].val() = 1.0;
4109 for (
unsigned int k=0; k<ndot; k++)
4124 typedef decltype(this->fad)
FadType;
4129 auto ndot = this->ndot_;
4131 unsigned int lda =
n+4;
4132 unsigned int ldb = m+5;
4133 VectorType
A(lda*
n,ndot),
B1(ldb*n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4134 for (
unsigned int j=0; j<n; j++) {
4135 for (
unsigned int i=0;
i<lda;
i++) {
4136 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4137 for (
unsigned int k=0; k<ndot; k++)
4141 FadType alpha(ndot, this->urand.number());
4142 for (
unsigned int k=0; k<ndot; k++) {
4143 alpha.fastAccessDx(k) = this->urand.number();
4146 for (
unsigned int j=0; j<n; j++) {
4147 for (
unsigned int i=0;
i<ldb;
i++) {
4148 ScalarType
val = this->urand.number();
4152 for (
unsigned int k=0; k<ndot; k++) {
4153 val = this->urand.number();
4154 B1[
i+j*ldb].fastAccessDx(k) =
val;
4155 B2[
i+j*ldb].fastAccessDx(k) =
val;
4156 B3[
i+j*ldb].fastAccessDx(k) =
val;
4171 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4196 for (
unsigned int i=0;
i<n;
i++) {
4197 A[
i*lda+
i].val() = 1.0;
4198 for (
unsigned int k=0; k<ndot; k++)
4213 typedef decltype(this->fad)
FadType;
4218 auto ndot = this->ndot_;
4220 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4221 for (
unsigned int j=0; j<m; j++) {
4222 for (
unsigned int i=0;
i<m;
i++) {
4223 A[
i+j*m] =
FadType(ndot, this->urand.number());
4224 for (
unsigned int k=0; k<ndot; k++)
4228 ScalarType alpha = this->urand.number();
4230 for (
unsigned int j=0; j<n; j++) {
4231 for (
unsigned int i=0;
i<m;
i++) {
4232 ScalarType
val = this->urand.number();
4236 for (
unsigned int k=0; k<ndot; k++) {
4237 val = this->urand.number();
4238 B1[
i+j*m].fastAccessDx(k) =
val;
4239 B2[
i+j*m].fastAccessDx(k) =
val;
4240 B3[
i+j*m].fastAccessDx(k) =
val;
4255 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4280 for (
unsigned int i=0;
i<m;
i++) {
4281 A[
i*m+
i].val() = 1.0;
4282 for (
unsigned int k=0; k<ndot; k++)
4297 typedef decltype(this->fad)
FadType;
4302 auto ndot = this->ndot_;
4304 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4305 for (
unsigned int j=0; j<m; j++) {
4306 for (
unsigned int i=0;
i<m;
i++) {
4307 A[
i+j*m] =
FadType(ndot, this->urand.number());
4308 for (
unsigned int k=0; k<ndot; k++)
4312 FadType alpha(ndot, this->urand.number());
4313 for (
unsigned int k=0; k<ndot; k++) {
4314 alpha.fastAccessDx(k) = this->urand.number();
4317 for (
unsigned int j=0; j<n; j++) {
4318 for (
unsigned int i=0;
i<m;
i++) {
4319 ScalarType
val = this->urand.number();
4336 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4361 for (
unsigned int i=0;
i<m;
i++) {
4362 A[
i*m+
i].val() = 1.0;
4363 for (
unsigned int k=0; k<ndot; k++)
4378 typedef decltype(this->fad)
FadType;
4383 auto ndot = this->ndot_;
4385 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot),
4386 B4(m*n,ndot), B5(m*n,ndot);
4387 std::vector<ScalarType>
a(m*m);
4388 for (
unsigned int j=0; j<m; j++) {
4389 for (
unsigned int i=0;
i<m;
i++) {
4390 a[
i+j*m] = this->urand.number();
4391 A[
i+j*m] = a[
i+j*m];
4394 FadType alpha(ndot, this->urand.number());
4395 for (
unsigned int k=0; k<ndot; k++) {
4396 alpha.fastAccessDx(k) = this->urand.number();
4399 for (
unsigned int j=0; j<n; j++) {
4400 for (
unsigned int i=0;
i<m;
i++) {
4401 ScalarType
val = this->urand.number();
4407 for (
unsigned int k=0; k<ndot; k++) {
4408 val = this->urand.number();
4409 B1[
i+j*m].fastAccessDx(k) =
val;
4410 B2[
i+j*m].fastAccessDx(k) =
val;
4411 B3[
i+j*m].fastAccessDx(k) =
val;
4412 B4[
i+j*m].fastAccessDx(k) =
val;
4413 B5[
i+j*m].fastAccessDx(k) =
val;
4428 unsigned int sz = m*m + m*n*(1+ndot);
4475 for (
unsigned int i=0;
i<m;
i++) {
4476 A[
i*m+
i].val() = 1.0;
4477 for (
unsigned int k=0; k<ndot; k++)
4498 typedef decltype(this->fad)
FadType;
4503 auto ndot = this->ndot_;
4505 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4506 for (
unsigned int j=0; j<m; j++) {
4507 for (
unsigned int i=0;
i<m;
i++) {
4509 A[
i+j*m] =
FadType(ndot, this->urand.number());
4510 for (
unsigned int k=0; k<ndot; k++)
4514 FadType alpha(ndot, this->urand.number());
4515 for (
unsigned int k=0; k<ndot; k++) {
4516 alpha.fastAccessDx(k) = this->urand.number();
4520 for (
unsigned int j=0; j<n; j++) {
4521 for (
unsigned int i=0;
i<m;
i++) {
4522 ScalarType
val = this->urand.number();
4529 for (
unsigned int k=0; k<ndot; k++) {
4530 val = this->urand.number();
4531 B1[
i+j*m].fastAccessDx(k) =
val;
4532 B2[
i+j*m].fastAccessDx(k) =
val;
4533 B3[
i+j*m].fastAccessDx(k) =
val;
4548 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4573 for (
unsigned int i=0;
i<m;
i++) {
4574 A[
i*m+
i].val() = 1.0;
4575 for (
unsigned int k=0; k<ndot; k++)
4590 typedef decltype(this->fad)
FadType;
4595 auto ndot = this->ndot_;
4597 VectorType
A(
n*
n,ndot),
B1(m*n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4598 for (
unsigned int j=0; j<n; j++) {
4599 for (
unsigned int i=0;
i<n;
i++) {
4600 A[
i+j*n] =
FadType(ndot, this->urand.number());
4601 for (
unsigned int k=0; k<ndot; k++)
4605 FadType alpha(ndot, this->urand.number());
4606 for (
unsigned int k=0; k<ndot; k++) {
4607 alpha.fastAccessDx(k) = this->urand.number();
4610 for (
unsigned int j=0; j<n; j++) {
4611 for (
unsigned int i=0;
i<m;
i++) {
4612 ScalarType
val = this->urand.number();
4616 for (
unsigned int k=0; k<ndot; k++) {
4617 val = this->urand.number();
4618 B1[
i+j*m].fastAccessDx(k) =
val;
4619 B2[
i+j*m].fastAccessDx(k) =
val;
4620 B3[
i+j*m].fastAccessDx(k) =
val;
4635 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4660 for (
unsigned int i=0;
i<n;
i++) {
4661 A[
i*n+
i].val() = 1.0;
4662 for (
unsigned int k=0; k<ndot; k++)
4677 typedef decltype(this->fad)
FadType;
4682 auto ndot = this->ndot_;
4684 unsigned int lda = m+4;
4685 unsigned int ldb = m+5;
4686 VectorType
A(lda*m,ndot),
B1(ldb*
n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4687 for (
unsigned int j=0; j<m; j++) {
4688 for (
unsigned int i=0;
i<lda;
i++) {
4689 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4690 for (
unsigned int k=0; k<ndot; k++)
4694 FadType alpha(ndot, this->urand.number());
4695 for (
unsigned int k=0; k<ndot; k++) {
4696 alpha.fastAccessDx(k) = this->urand.number();
4699 for (
unsigned int j=0; j<n; j++) {
4700 for (
unsigned int i=0;
i<ldb;
i++) {
4701 ScalarType
val = this->urand.number();
4705 for (
unsigned int k=0; k<ndot; k++) {
4706 val = this->urand.number();
4707 B1[
i+j*ldb].fastAccessDx(k) =
val;
4708 B2[
i+j*ldb].fastAccessDx(k) =
val;
4709 B3[
i+j*ldb].fastAccessDx(k) =
val;
4724 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4749 for (
unsigned int i=0;
i<m;
i++) {
4750 A[
i*lda+
i].val() = 1.0;
4751 for (
unsigned int k=0; k<ndot; k++)
4766 typedef decltype(this->fad)
FadType;
4771 auto ndot = this->ndot_;
4773 unsigned int lda =
n+4;
4774 unsigned int ldb = m+5;
4775 VectorType
A(lda*
n,ndot),
B1(ldb*n,ndot),
B2(ldb*n,ndot), B3(ldb*n,ndot);
4776 for (
unsigned int j=0; j<n; j++) {
4777 for (
unsigned int i=0;
i<lda;
i++) {
4778 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4779 for (
unsigned int k=0; k<ndot; k++)
4783 FadType alpha(ndot, this->urand.number());
4784 for (
unsigned int k=0; k<ndot; k++) {
4785 alpha.fastAccessDx(k) = this->urand.number();
4788 for (
unsigned int j=0; j<n; j++) {
4789 for (
unsigned int i=0;
i<ldb;
i++) {
4790 ScalarType
val = this->urand.number();
4794 for (
unsigned int k=0; k<ndot; k++) {
4795 val = this->urand.number();
4796 B1[
i+j*ldb].fastAccessDx(k) =
val;
4797 B2[
i+j*ldb].fastAccessDx(k) =
val;
4798 B3[
i+j*ldb].fastAccessDx(k) =
val;
4813 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4838 for (
unsigned int i=0;
i<n;
i++) {
4839 A[
i*lda+
i].val() = 1.0;
4840 for (
unsigned int k=0; k<ndot; k++)
4855 typedef decltype(this->fad)
FadType;
4860 auto ndot = this->ndot_;
4862 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4863 for (
unsigned int j=0; j<m; j++) {
4864 for (
unsigned int i=0;
i<m;
i++) {
4865 A[
i+j*m] =
FadType(ndot, this->urand.number());
4866 for (
unsigned int k=0; k<ndot; k++)
4870 ScalarType alpha = this->urand.number();
4872 for (
unsigned int j=0; j<n; j++) {
4873 for (
unsigned int i=0;
i<m;
i++) {
4874 ScalarType
val = this->urand.number();
4878 for (
unsigned int k=0; k<ndot; k++) {
4879 val = this->urand.number();
4880 B1[
i+j*m].fastAccessDx(k) =
val;
4881 B2[
i+j*m].fastAccessDx(k) =
val;
4882 B3[
i+j*m].fastAccessDx(k) =
val;
4897 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4922 for (
unsigned int i=0;
i<m;
i++) {
4923 A[
i*m+
i].val() = 1.0;
4924 for (
unsigned int k=0; k<ndot; k++)
4939 typedef decltype(this->fad)
FadType;
4944 auto ndot = this->ndot_;
4946 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot);
4947 for (
unsigned int j=0; j<m; j++) {
4948 for (
unsigned int i=0;
i<m;
i++) {
4949 A[
i+j*m] =
FadType(ndot, this->urand.number());
4950 for (
unsigned int k=0; k<ndot; k++)
4954 FadType alpha(ndot, this->urand.number());
4955 for (
unsigned int k=0; k<ndot; k++) {
4956 alpha.fastAccessDx(k) = this->urand.number();
4959 for (
unsigned int j=0; j<n; j++) {
4960 for (
unsigned int i=0;
i<m;
i++) {
4961 ScalarType
val = this->urand.number();
4978 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
5003 for (
unsigned int i=0;
i<m;
i++) {
5004 A[
i*m+
i].val() = 1.0;
5005 for (
unsigned int k=0; k<ndot; k++)
5020 typedef decltype(this->fad)
FadType;
5025 auto ndot = this->ndot_;
5027 VectorType
A(m*m,ndot),
B1(m*
n,ndot),
B2(m*n,ndot), B3(m*n,ndot),
5028 B4(m*n,ndot), B5(m*n,ndot);
5029 std::vector<ScalarType>
a(m*m);
5030 for (
unsigned int j=0; j<m; j++) {
5031 for (
unsigned int i=0;
i<m;
i++) {
5032 a[
i+j*m] = this->urand.number();
5033 A[
i+j*m] = a[
i+j*m];
5036 FadType alpha(ndot, this->urand.number());
5037 for (
unsigned int k=0; k<ndot; k++) {
5038 alpha.fastAccessDx(k) = this->urand.number();
5041 for (
unsigned int j=0; j<n; j++) {
5042 for (
unsigned int i=0;
i<m;
i++) {
5043 ScalarType
val = this->urand.number();
5049 for (
unsigned int k=0; k<ndot; k++) {
5050 val = this->urand.number();
5051 B1[
i+j*m].fastAccessDx(k) =
val;
5052 B2[
i+j*m].fastAccessDx(k) =
val;
5053 B3[
i+j*m].fastAccessDx(k) =
val;
5054 B4[
i+j*m].fastAccessDx(k) =
val;
5055 B5[
i+j*m].fastAccessDx(k) =
val;
5070 unsigned int sz = m*m + m*n*(1+ndot);
5117 for (
unsigned int i=0;
i<m;
i++) {
5118 A[
i*m+
i].val() = 1.0;
5119 for (
unsigned int k=0; k<ndot; k++)
5224 #endif // FADBLASUNITTESTS_HPP
Sacado::Random< ScalarType > urand
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Sacado::Random< double > real_urand
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Fad::DFad< double > FadType
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sacado::ScalarType< FadType >::type ScalarType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
TYPED_TEST_SUITE_P(FadBLASUnitTests)
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
A random number generator that generates random numbers uniformly distributed in the interval (a...
#define COMPARE_FADS(a, b)