10 #ifndef SACADO_FAD_BLAS_HPP
11 #define SACADO_FAD_BLAS_HPP
22 template <
typename OrdinalType,
typename FadType>
42 OrdinalType& n_dot, OrdinalType& inc_val,
47 OrdinalType lda, OrdinalType& n_dot,
48 OrdinalType& lda_val, OrdinalType& lda_dot,
55 OrdinalType& n_dot, OrdinalType& inc_val,
60 OrdinalType lda, OrdinalType& n_dot,
61 OrdinalType& lda_val, OrdinalType& lda_dot,
68 OrdinalType& n_dot, OrdinalType& inc_val,
73 OrdinalType lda, OrdinalType& n_dot,
74 OrdinalType& lda_val, OrdinalType& lda_dot,
77 void unpack(
FadType&
a, OrdinalType& n_dot, OrdinalType& final_n_dot,
81 OrdinalType& n_dot, OrdinalType& final_n_dot,
82 OrdinalType& inc_val, OrdinalType& inc_dot,
85 void unpack(
FadType*
A, OrdinalType m, OrdinalType n, OrdinalType lda,
86 OrdinalType& n_dot, OrdinalType& final_n_dot,
87 OrdinalType& lda_val, OrdinalType& lda_dot,
94 OrdinalType n_dot, OrdinalType inc_val, OrdinalType inc_dot,
98 OrdinalType lda, OrdinalType n_dot,
99 OrdinalType lda_val, OrdinalType lda_dot,
105 void free(
const FadType*
a, OrdinalType n, OrdinalType n_dot,
106 OrdinalType inc_val, OrdinalType inc_dot,
109 void free(
const FadType*
A, OrdinalType m, OrdinalType n,
110 OrdinalType n_dot, OrdinalType lda_val, OrdinalType lda_dot,
117 OrdinalType inc_val, OrdinalType inc_dot,
121 OrdinalType n_dot, OrdinalType lda_val, OrdinalType lda_dot,
128 OrdinalType inc_val, OrdinalType inc_dot,
132 OrdinalType n_dot, OrdinalType lda_val, OrdinalType lda_dot,
140 OrdinalType n_dot)
const;
161 template <
typename OrdinalType,
typename FadType>
176 bool use_dynamic =
true, OrdinalType static_workspace_size = 0);
204 const OrdinalType incx)
const;
209 const OrdinalType incy)
const;
212 template <
typename alpha_type,
typename x_type>
213 void AXPY(
const OrdinalType n,
const alpha_type& alpha,
214 const x_type*
x,
const OrdinalType incx,
FadType*
y,
215 const OrdinalType incy)
const;
220 const OrdinalType incx)
const {
225 template <
typename x_type,
typename y_type>
227 const OrdinalType incx,
const y_type*
y,
228 const OrdinalType incy)
const;
232 const OrdinalType incx)
const;
236 const OrdinalType incx)
const {
250 template <
typename alpha_type,
typename A_type,
typename x_type,
254 const alpha_type& alpha,
const A_type*
A,
255 const OrdinalType lda,
const x_type*
x,
256 const OrdinalType incx,
const beta_type& beta,
257 FadType*
y,
const OrdinalType incy)
const;
264 template <
typename A_type>
267 const A_type*
A,
const OrdinalType lda,
FadType*
x,
268 const OrdinalType incx)
const;
271 template <
typename alpha_type,
typename x_type,
typename y_type>
272 void GER(
const OrdinalType m,
const OrdinalType n,
273 const alpha_type& alpha,
274 const x_type*
x,
const OrdinalType incx,
275 const y_type*
y,
const OrdinalType incy,
276 FadType*
A,
const OrdinalType lda)
const;
289 template <
typename alpha_type,
typename A_type,
typename B_type,
292 const OrdinalType m,
const OrdinalType n,
const OrdinalType k,
293 const alpha_type& alpha,
const A_type*
A,
const OrdinalType lda,
294 const B_type*
B,
const OrdinalType ldb,
const beta_type& beta,
295 FadType*
C,
const OrdinalType ldc)
const;
303 template <
typename alpha_type,
typename A_type,
typename B_type,
307 const alpha_type& alpha,
const A_type*
A,
308 const OrdinalType lda,
const B_type*
B,
309 const OrdinalType ldb,
311 const OrdinalType ldc)
const;
319 template <
typename alpha_type,
typename A_type>
322 const OrdinalType m,
const OrdinalType n,
323 const alpha_type& alpha,
324 const A_type*
A,
const OrdinalType lda,
325 FadType*
B,
const OrdinalType ldb)
const;
334 template <
typename alpha_type,
typename A_type>
337 const OrdinalType m,
const OrdinalType n,
338 const alpha_type& alpha,
339 const A_type*
A,
const OrdinalType lda,
340 FadType*
B,
const OrdinalType ldb)
const;
364 template <
typename x_type,
typename y_type>
365 void Fad_DOT(
const OrdinalType n,
367 const OrdinalType incx,
368 const OrdinalType n_x_dot,
370 const OrdinalType incx_dot,
372 const OrdinalType incy,
373 const OrdinalType n_y_dot,
375 const OrdinalType incy_dot,
377 const OrdinalType n_z_dot,
381 template <
typename alpha_type,
typename A_type,
typename x_type,
386 const alpha_type& alpha,
387 const OrdinalType n_alpha_dot,
388 const alpha_type* alpha_dot,
390 const OrdinalType lda,
391 const OrdinalType n_A_dot,
393 const OrdinalType lda_dot,
395 const OrdinalType incx,
396 const OrdinalType n_x_dot,
398 const OrdinalType incx_dot,
399 const beta_type& beta,
400 const OrdinalType n_beta_dot,
401 const beta_type* beta_dot,
403 const OrdinalType incy,
404 const OrdinalType n_y_dot,
406 const OrdinalType incy_dot,
407 const OrdinalType n_dot)
const;
410 template <
typename alpha_type,
typename x_type,
typename y_type>
411 void Fad_GER(
const OrdinalType m,
413 const alpha_type& alpha,
414 const OrdinalType n_alpha_dot,
415 const alpha_type* alpha_dot,
417 const OrdinalType incx,
418 const OrdinalType n_x_dot,
420 const OrdinalType incx_dot,
422 const OrdinalType incy,
423 const OrdinalType n_y_dot,
425 const OrdinalType incy_dot,
427 const OrdinalType lda,
428 const OrdinalType n_A_dot,
430 const OrdinalType lda_dot,
431 const OrdinalType n_dot)
const;
434 template <
typename alpha_type,
typename A_type,
typename B_type,
441 const alpha_type& alpha,
442 const OrdinalType n_alpha_dot,
443 const alpha_type* alpha_dot,
445 const OrdinalType lda,
446 const OrdinalType n_A_dot,
448 const OrdinalType lda_dot,
450 const OrdinalType ldb,
451 const OrdinalType n_B_dot,
453 const OrdinalType ldb_dot,
454 const beta_type& beta,
455 const OrdinalType n_beta_dot,
456 const beta_type* beta_dot,
458 const OrdinalType ldc,
459 const OrdinalType n_C_dot,
461 const OrdinalType ldc_dot,
462 const OrdinalType n_dot)
const;
465 template <
typename alpha_type,
typename A_type,
typename B_type,
471 const alpha_type& alpha,
472 const OrdinalType n_alpha_dot,
473 const alpha_type* alpha_dot,
475 const OrdinalType lda,
476 const OrdinalType n_A_dot,
478 const OrdinalType lda_dot,
480 const OrdinalType ldb,
481 const OrdinalType n_B_dot,
483 const OrdinalType ldb_dot,
484 const beta_type& beta,
485 const OrdinalType n_beta_dot,
486 const beta_type* beta_dot,
488 const OrdinalType ldc,
489 const OrdinalType n_C_dot,
491 const OrdinalType ldc_dot,
492 const OrdinalType n_dot)
const;
495 template <
typename alpha_type,
typename A_type>
502 const alpha_type& alpha,
503 const OrdinalType n_alpha_dot,
504 const alpha_type* alpha_dot,
506 const OrdinalType lda,
507 const OrdinalType n_A_dot,
509 const OrdinalType lda_dot,
511 const OrdinalType ldb,
512 const OrdinalType n_B_dot,
514 const OrdinalType ldb_dot,
515 const OrdinalType n_dot)
const;
518 template <
typename alpha_type,
typename A_type>
525 const alpha_type& alpha,
526 const OrdinalType n_alpha_dot,
527 const alpha_type* alpha_dot,
529 const OrdinalType lda,
530 const OrdinalType n_A_dot,
532 const OrdinalType lda_dot,
534 const OrdinalType ldb,
535 const OrdinalType n_B_dot,
537 const OrdinalType ldb_dot,
538 const OrdinalType n_dot)
const;
551 #define TEUCHOS_BLAS_FAD_SPEC(FADTYPE) \
552 namespace Teuchos { \
553 template <typename OrdinalType, typename ValueT> \
554 class BLAS< OrdinalType, FADTYPE<ValueT> > : \
555 public Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT> > { \
557 BLAS(bool use_default_impl = true, bool use_dynamic = true, \
558 OrdinalType static_workspace_size = 0) : \
559 Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT> >( \
560 use_default_impl, use_dynamic,static_workspace_size) {} \
561 BLAS(const BLAS& x) : \
562 Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT> >(x) {} \
568 template <typename ValueT> \
569 struct ArrayValueType< FADTYPE<ValueT> > { \
570 typedef ValueT type; \
574 #define TEUCHOS_BLAS_SFAD_SPEC(FADTYPE) \
575 namespace Teuchos { \
576 template <typename OrdinalType, typename ValueT, int Num> \
577 class BLAS< OrdinalType, FADTYPE<ValueT,Num> > : \
578 public Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT,Num> > { \
580 BLAS(bool use_default_impl = true, bool use_dynamic = true, \
581 OrdinalType static_workspace_size = 0) : \
582 Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT,Num> >( \
583 use_default_impl, use_dynamic, static_workspace_size) {} \
584 BLAS(const BLAS& x) : \
585 Sacado::Fad::BLAS< OrdinalType, FADTYPE<ValueT,Num> >(x) {} \
591 template <typename ValueT, int Num> \
592 struct ArrayValueType< FADTYPE<ValueT,Num> > { \
593 typedef ValueT type; \
606 #undef TEUCHOS_BLAS_FAD_SPEC
607 #undef TEUCHOS_BLAS_SFAD_SPEC
611 #endif // SACADO_FAD_BLAS_HPP
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, FadType *B, const OrdinalType ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void TRMV(Teuchos::EUplo uplo, Teuchos::ETransp trans, Teuchos::EDiag diag, const OrdinalType n, const A_type *A, const OrdinalType lda, FadType *x, const OrdinalType incx) const
Performs the matrix-std::vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n uppe...
void ROT(const OrdinalType n, FadType *dx, const OrdinalType incx, FadType *dy, const OrdinalType incy, MagnitudeType *c, FadType *s) const
Applies a Givens plane rotation.
void Fad_GEMM(Teuchos::ETransp transa, Teuchos::ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const A_type *A, const OrdinalType lda, const OrdinalType n_A_dot, const A_type *A_dot, const OrdinalType lda_dot, const B_type *B, const OrdinalType ldb, const OrdinalType n_B_dot, const B_type *B_dot, const OrdinalType ldb_dot, const beta_type &beta, const OrdinalType n_beta_dot, const beta_type *beta_dot, ValueType *C, const OrdinalType ldc, const OrdinalType n_C_dot, ValueType *C_dot, const OrdinalType ldc_dot, const OrdinalType n_dot) const
Implementation of GEMM.
Teuchos::DefaultBLASImpl< OrdinalType, FadType > BLASType
void GEMV(Teuchos::ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, const x_type *x, const OrdinalType incx, const beta_type &beta, FadType *y, const OrdinalType incy) const
Performs the matrix-std::vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a...
void GER(const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy, FadType *A, const OrdinalType lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
Sacado::dummy< ValueType, scalar_type >::type ScalarType
bool use_default_impl
Use custom or default implementation.
void ROTG(FadType *da, FadType *db, MagnitudeType *c, FadType *s) const
Computes a Givens plane rotation.
void Fad_TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const A_type *A, const OrdinalType lda, const OrdinalType n_A_dot, const A_type *A_dot, const OrdinalType lda_dot, ValueType *B, const OrdinalType ldb, const OrdinalType n_B_dot, ValueType *B_dot, const OrdinalType ldb_dot, const OrdinalType n_dot) const
Implementation of TRMM.
std::vector< ValueType > gemv_Ax
Temporary array for GEMV.
bool is_array_contiguous(const FadType *a, OrdinalType n, OrdinalType n_dot) const
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
FadType DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy) const
Form the dot product of the vectors x and y.
virtual ~BLAS()
Destructor.
Fad specializations for Teuchos::BLAS wrappers.
void GEMM(Teuchos::ETransp transa, Teuchos::ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type &alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type &beta, FadType *C, const OrdinalType ldc) const
Performs the matrix-matrix operation: C <- alpha*op(A)*op(B)+beta*C where op(A) is either A or A'...
OrdinalType IAMAX(const OrdinalType n, const FadType *x, const OrdinalType incx) const
Return the index of the element of x with the maximum magnitude.
GeneralFad< StaticStorage< T, Num > > SLFad
bool use_dynamic
Use dynamic memory allocation.
#define TEUCHOS_BLAS_SFAD_SPEC(FADTYPE)
ValueType * allocate_array(OrdinalType size) const
ArrayTraits< OrdinalType, FadType > arrayTraits
ArrayTraits for packing/unpacking value/derivative arrays.
Sacado::ScalarType< FadType >::type scalar_type
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Teuchos::ScalarTraits< FadType >::magnitudeType ASUM(const OrdinalType n, const FadType *x, const OrdinalType incx) const
Sum the absolute values of the entries of x.
#define TEUCHOS_BLAS_FAD_SPEC(FADTYPE)
void free(const ScalarType &a, OrdinalType n_dot, const ScalarType *dot) const
void TRMM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, FadType *B, const OrdinalType ldb) const
Performs the matrix-matrix operation: C <- alpha*op(A)*B+beta*C or C <- alpha*B*op(A)+beta*C where op...
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
ValueType * workspace_pointer
Pointer to current free entry in workspace.
expr expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c *expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr2 expr1 expr2 expr1 expr1 expr1 c
OrdinalType workspace_size
Size of static workspace.
std::vector< ValueType > gemm_AB
Temporary array for GEMM.
GeneralFad< DynamicStorage< T > > DFad
Teuchos::BLAS< OrdinalType, ValueType > blas
BLAS for values.
void Fad_SYMM(Teuchos::ESide side, Teuchos::EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const A_type *A, const OrdinalType lda, const OrdinalType n_A_dot, const A_type *A_dot, const OrdinalType lda_dot, const B_type *B, const OrdinalType ldb, const OrdinalType n_B_dot, const B_type *B_dot, const OrdinalType ldb_dot, const beta_type &beta, const OrdinalType n_beta_dot, const beta_type *beta_dot, ValueType *C, const OrdinalType ldc, const OrdinalType n_C_dot, ValueType *C_dot, const OrdinalType ldc_dot, const OrdinalType n_dot) const
Implementation of SYMM.
Sacado::dummy< ValueType, scalar_type >::type ScalarType
GeneralFad< VectorDynamicStorage< T > > DVFad
void free(const ValueType *A, OrdinalType m, OrdinalType n, OrdinalType n_dot, OrdinalType lda_val, OrdinalType lda_dot, const ValueType *val, const ValueType *dot) const
ArrayTraits(bool use_dynamic=true, OrdinalType workspace_size=0)
void AXPY(const OrdinalType n, const alpha_type &alpha, const x_type *x, const OrdinalType incx, FadType *y, const OrdinalType incy) const
Perform the operation: y <- y+alpha*x.
void COPY(const OrdinalType n, const FadType *x, const OrdinalType incx, FadType *y, const OrdinalType incy) const
Copy the std::vector x to the std::vector y.
Teuchos::ScalarTraits< FadType >::magnitudeType MagnitudeType
void SYMM(Teuchos::ESide side, Teuchos::EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type &beta, FadType *C, const OrdinalType ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void Fad_TRMM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const A_type *A, const OrdinalType lda, const OrdinalType n_A_dot, const A_type *A_dot, const OrdinalType lda_dot, ValueType *B, const OrdinalType ldb, const OrdinalType n_B_dot, ValueType *B_dot, const OrdinalType ldb_dot, const OrdinalType n_dot) const
Implementation of TRMM.
void free(const ScalarType *a, OrdinalType n, OrdinalType n_dot, OrdinalType inc_val, OrdinalType inc_dot, const ScalarType *val, const ScalarType *dot) const
void free(const ScalarType *A, OrdinalType m, OrdinalType n, OrdinalType n_dot, OrdinalType lda_val, OrdinalType lda_dot, const ScalarType *val, const ScalarType *dot) const
void free_array(const ValueType *ptr, OrdinalType size) const
MagnitudeType NRM2(const OrdinalType n, const FadType *x, const OrdinalType incx) const
Compute the 2-norm of the std::vector x.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
ValueType * workspace
Workspace for holding contiguous values/derivatives.
Sacado::ScalarType< FadType >::type scalar_type
void Fad_GEMV(Teuchos::ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const A_type *A, const OrdinalType lda, const OrdinalType n_A_dot, const A_type *A_dot, const OrdinalType lda_dot, const x_type *x, const OrdinalType incx, const OrdinalType n_x_dot, const x_type *x_dot, const OrdinalType incx_dot, const beta_type &beta, const OrdinalType n_beta_dot, const beta_type *beta_dot, ValueType *y, const OrdinalType incy, const OrdinalType n_y_dot, ValueType *y_dot, const OrdinalType incy_dot, const OrdinalType n_dot) const
Implementation of GEMV.
void free(const ValueType *a, OrdinalType n, OrdinalType n_dot, OrdinalType inc_val, OrdinalType inc_dot, const ValueType *val, const ValueType *dot) const
Sacado::ValueType< FadType >::type ValueType
void free(const ValueType &a, OrdinalType n_dot, const ValueType *dot) const
void SCAL(const OrdinalType n, const FadType &alpha, FadType *x, const OrdinalType incx) const
Scale the std::vector x by the constant alpha.
void Fad_DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const OrdinalType n_x_dot, const x_type *x_dot, const OrdinalType incx_dot, const y_type *y, const OrdinalType incy, const OrdinalType n_y_dot, const y_type *y_dot, const OrdinalType incy_dot, ValueType &z, const OrdinalType n_z_dot, ValueType *zdot) const
Implementation of DOT.
GeneralFad< StaticFixedStorage< T, Num > > SFad
Base template specification for ValueType.
Sacado::ValueType< FadType >::type ValueType
void Fad_GER(const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const OrdinalType n_alpha_dot, const alpha_type *alpha_dot, const x_type *x, const OrdinalType incx, const OrdinalType n_x_dot, const x_type *x_dot, const OrdinalType incx_dot, const y_type *y, const OrdinalType incy, const OrdinalType n_y_dot, const y_type *y_dot, const OrdinalType incy_dot, ValueType *A, const OrdinalType lda, const OrdinalType n_A_dot, ValueType *A_dot, const OrdinalType lda_dot, const OrdinalType n_dot) const
Implementation of GER.
BLAS(bool use_default_impl=true, bool use_dynamic=true, OrdinalType static_workspace_size=0)
Default constructor.