27 #ifndef _TEUCHOS_BLAS_HPP_
28 #define _TEUCHOS_BLAS_HPP_
41 #include "Teuchos_Assert.hpp"
77 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ESideChar[];
78 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ETranspChar[];
79 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char EUploChar[];
80 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char EDiagChar[];
81 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ETypeChar[];
85 template<
typename ScalarType,
96 template<
typename OrdinalType,
typename ScalarType>
122 typedef typename details::GivensRotator<ScalarType>::c_type
rotg_c_type;
125 void ROTG(ScalarType* da, ScalarType* db,
rotg_c_type* c, ScalarType* s)
const;
128 void ROT(
const OrdinalType& n, ScalarType* dx,
const OrdinalType& incx, ScalarType* dy,
const OrdinalType& incy, MagnitudeType* c, ScalarType* s)
const;
131 void SCAL(
const OrdinalType& n,
const ScalarType& alpha, ScalarType* x,
const OrdinalType& incx)
const;
134 void COPY(
const OrdinalType& n,
const ScalarType* x,
const OrdinalType& incx, ScalarType* y,
const OrdinalType& incy)
const;
137 template <
typename alpha_type,
typename x_type>
138 void AXPY(
const OrdinalType& n,
const alpha_type alpha,
const x_type* x,
const OrdinalType& incx, ScalarType* y,
const OrdinalType& incy)
const;
144 template <
typename x_type,
typename y_type>
145 ScalarType
DOT(
const OrdinalType& n,
const x_type* x,
const OrdinalType& incx,
const y_type* y,
const OrdinalType& incy)
const;
151 OrdinalType
IAMAX(
const OrdinalType& n,
const ScalarType* x,
const OrdinalType& incx)
const;
158 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
159 void GEMV(
ETransp trans,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
160 const OrdinalType& lda,
const x_type* x,
const OrdinalType& incx,
const beta_type beta, ScalarType* y,
const OrdinalType& incy)
const;
163 template <
typename A_type>
165 const OrdinalType& lda, ScalarType* x,
const OrdinalType& incx)
const;
169 template <
typename alpha_type,
typename x_type,
typename y_type>
170 void GER(
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const x_type* x,
const OrdinalType& incx,
171 const y_type* y,
const OrdinalType& incy, ScalarType* A,
const OrdinalType& lda)
const;
183 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
184 void GEMM(
ETransp transa,
ETransp transb,
const OrdinalType& m,
const OrdinalType& n,
const OrdinalType& k,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const B_type* B,
const OrdinalType& ldb,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const;
188 SWAP (
const OrdinalType& n, ScalarType*
const x,
const OrdinalType& incx,
189 ScalarType*
const y,
const OrdinalType& incy)
const;
192 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
193 void SYMM(
ESide side,
EUplo uplo,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const B_type* B,
const OrdinalType& ldb,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const;
196 template <
typename alpha_type,
typename A_type,
typename beta_type>
197 void SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType& n,
const OrdinalType& k,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const;
200 template <
typename alpha_type,
typename A_type>
202 const alpha_type alpha,
const A_type* A,
const OrdinalType& lda, ScalarType* B,
const OrdinalType& ldb)
const;
205 template <
typename alpha_type,
typename A_type>
207 const alpha_type alpha,
const A_type* A,
const OrdinalType& lda, ScalarType* B,
const OrdinalType& ldb)
const;
211 template<
typename OrdinalType,
typename ScalarType>
246 template<
typename ScalarType,
bool isComplex>
254 template<
typename ScalarType>
255 class MagValue<ScalarType, true> {
262 template<
typename ScalarType>
263 class MagValue<ScalarType, false> {
266 blas_dabs1(
const ScalarType* a, ScalarType* ret)
const;
269 template<
typename ScalarType,
bool isComplex>
270 class GivensRotator {};
273 template<
typename ScalarType>
274 class GivensRotator<ScalarType, true> {
278 ROTG (ScalarType* ca,
281 ScalarType* s)
const;
285 template<
typename ScalarType>
286 class GivensRotator<ScalarType, false> {
288 typedef ScalarType c_type;
290 ROTG (ScalarType* da,
293 ScalarType* s)
const;
307 ScalarType SIGN (
const ScalarType& x,
const ScalarType& y)
const {
308 typedef ScalarTraits<ScalarType> STS;
310 if (y > STS::zero()) {
311 return STS::magnitude (x);
312 }
else if (y < STS::zero()) {
313 return -STS::magnitude (x);
326 ScalarType signedInfinity = STS::one() / y;
327 if (signedInfinity > STS::zero()) {
328 return STS::magnitude (x);
333 return -STS::magnitude (x);
340 template<
typename ScalarType>
342 GivensRotator<ScalarType, true>::
343 ROTG (ScalarType* ca,
348 typedef ScalarTraits<ScalarType> STS;
349 typedef typename STS::magnitudeType MagnitudeType;
350 typedef ScalarTraits<MagnitudeType> STM;
369 MagnitudeType norm, scale;
371 if (STS::magnitude (*ca) == STM::zero()) {
376 scale = STS::magnitude (*ca) + STS::magnitude (*cb);
380 const MagnitudeType ca_scaled =
381 STS::magnitude (*ca / ScalarType(scale, STM::zero()));
382 const MagnitudeType cb_scaled =
383 STS::magnitude (*cb / ScalarType(scale, STM::zero()));
385 STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
387 alpha = *ca / STS::magnitude (*ca);
388 *c = STS::magnitude (*ca) / norm;
389 *s = alpha * STS::conjugate (*cb) / norm;
395 template<
typename ScalarType>
397 GivensRotator<ScalarType, false>::
398 ROTG (ScalarType* da,
403 typedef ScalarTraits<ScalarType> STS;
426 ScalarType r, roe, scale, z;
429 if (STS::magnitude (*da) > STS::magnitude (*db)) {
432 scale = STS::magnitude (*da) + STS::magnitude (*db);
433 if (scale == STS::zero()) {
442 const ScalarType da_scaled = *da / scale;
443 const ScalarType db_scaled = *db / scale;
444 r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
445 r = SIGN (STS::one(), roe) * r;
449 if (STS::magnitude (*da) > STS::magnitude (*db)) {
452 if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
462 template<
typename ScalarType>
464 MagValue<ScalarType, false>::
465 blas_dabs1(
const ScalarType* a, ScalarType* ret)
const
471 template<
typename ScalarType>
473 MagValue<ScalarType, true>::
476 *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real());
477 *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag());
482 template<
typename OrdinalType,
typename ScalarType>
490 details::GivensRotator<ScalarType> rotator;
491 rotator.ROTG (da, db, c, s);
494 template<
typename OrdinalType,
typename ScalarType>
500 if (incx==1 && incy==1) {
501 for(OrdinalType i=0; i<n; ++i) {
502 ScalarType temp = *c*dx[i] + *s*dy[i];
503 dy[i] = *c*dy[i] - sconj*dx[i];
508 OrdinalType ix = 0, iy = 0;
509 if (incx < izero) ix = (-n+1)*incx;
510 if (incy < izero) iy = (-n+1)*incy;
511 for(OrdinalType i=0; i<n; ++i) {
512 ScalarType temp = *c*dx[ix] + *s*dy[iy];
513 dy[iy] = *c*dy[iy] - sconj*dx[ix];
521 template<
typename OrdinalType,
typename ScalarType>
526 OrdinalType i, ix = izero;
528 if ( n < ione || incx < ione )
532 for(i = izero; i < n; i++)
539 template<
typename OrdinalType,
typename ScalarType>
544 OrdinalType i, ix = izero, iy = izero;
547 if (incx < izero) { ix = (-n+ione)*incx; }
548 if (incy < izero) { iy = (-n+ione)*incy; }
550 for(i = izero; i < n; i++)
559 template<
typename OrdinalType,
typename ScalarType>
560 template <
typename alpha_type,
typename x_type>
565 OrdinalType i, ix = izero, iy = izero;
569 if (incx < izero) { ix = (-n+ione)*incx; }
570 if (incy < izero) { iy = (-n+ione)*incy; }
572 for(i = izero; i < n; i++)
574 y[iy] += alpha * x[ix];
581 template<
typename OrdinalType,
typename ScalarType>
588 OrdinalType i, ix = izero;
590 if ( n < ione || incx < ione )
593 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
594 for (i = izero; i < n; i++)
596 mval.blas_dabs1( &x[ix], &temp );
604 template<
typename OrdinalType,
typename ScalarType>
605 template <
typename x_type,
typename y_type>
611 OrdinalType i, ix = izero, iy = izero;
615 if (incx < izero) { ix = (-n+ione)*incx; }
616 if (incy < izero) { iy = (-n+ione)*incy; }
618 for(i = izero; i < n; i++)
628 template<
typename OrdinalType,
typename ScalarType>
635 OrdinalType i, ix = izero;
637 if ( n < ione || incx < ione )
640 for(i = izero; i < n; i++)
649 template<
typename OrdinalType,
typename ScalarType>
654 OrdinalType result = izero, ix = izero, i;
660 if ( n < ione || incx < ione )
663 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
665 mval.blas_dabs1( &x[ix], &maxval );
667 for(i = ione; i < n; i++)
669 mval.blas_dabs1( &x[ix], &curval );
684 template<
typename OrdinalType,
typename ScalarType>
685 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
686 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(
ETransp trans,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const x_type* x,
const OrdinalType& incx,
const beta_type beta, ScalarType* y,
const OrdinalType& incy)
const
696 bool BadArgument =
false;
699 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){
return; }
703 std::cout <<
"BLAS::GEMV Error: M == " << m << std::endl;
707 std::cout <<
"BLAS::GEMV Error: N == " << n << std::endl;
711 std::cout <<
"BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
714 if( incx == izero ) {
715 std::cout <<
"BLAS::GEMV Error: INCX == 0"<< std::endl;
718 if( incy == izero ) {
719 std::cout <<
"BLAS::GEMV Error: INCY == 0"<< std::endl;
724 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
725 OrdinalType kx = izero, ky = izero;
729 if(ETranspChar[trans] ==
'N') {
738 noConj = (ETranspChar[trans] ==
'T');
741 if (incx < izero ) { kx = (ione - lenx)*incx; }
742 if (incy < izero ) { ky = (ione - leny)*incy; }
746 if(beta != beta_one) {
748 if (beta == beta_zero) {
749 for(i = izero; i < leny; i++) { y[i] = y_zero; }
751 for(i = izero; i < leny; i++) { y[i] *= beta; }
754 if (beta == beta_zero) {
755 for(i = izero; i < leny; i++) {
760 for(i = izero; i < leny; i++) {
769 if(alpha == alpha_zero) {
return; }
771 if( ETranspChar[trans] ==
'N' ) {
775 for(j = izero; j < n; j++) {
776 if (x[jx] != x_zero) {
778 for(i = izero; i < m; i++) {
779 y[i] += temp*A[j*lda + i];
785 for(j = izero; j < n; j++) {
786 if (x[jx] != x_zero) {
789 for(i = izero; i < m; i++) {
790 y[iy] += temp*A[j*lda + i];
800 for(j = izero; j < n; j++) {
803 for(i = izero; i < m; i++) {
804 temp += A[j*lda + i]*x[i];
807 for(i = izero; i < m; i++) {
815 for(j = izero; j < n; j++) {
819 for (i = izero; i < m; i++) {
820 temp += A[j*lda + i]*x[ix];
824 for (i = izero; i < m; i++) {
837 template<
typename OrdinalType,
typename ScalarType>
838 template <
typename A_type>
844 bool BadArgument =
false;
848 if( n == izero ){
return; }
852 std::cout <<
"BLAS::TRMV Error: N == " << n << std::endl;
856 std::cout <<
"BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
859 if( incx == izero ) {
860 std::cout <<
"BLAS::TRMV Error: INCX == 0"<< std::endl;
865 OrdinalType i, j, ix, jx, kx = izero;
867 bool noUnit = (EDiagChar[diag] ==
'N');
870 noConj = (ETranspChar[trans] ==
'T');
873 if (incx < izero) { kx = (-n+ione)*incx; }
876 if (ETranspChar[trans] ==
'N') {
878 if (EUploChar[uplo] ==
'U') {
881 for (j=izero; j<n; j++) {
884 for (i=izero; i<j; i++) {
885 x[i] += temp*A[j*lda + i];
888 x[j] *= A[j*lda + j];
893 for (j=izero; j<n; j++) {
897 for (i=izero; i<j; i++) {
898 x[ix] += temp*A[j*lda + i];
902 x[jx] *= A[j*lda + j];
909 for (j=n-ione; j>-ione; j--) {
912 for (i=n-ione; i>j; i--) {
913 x[i] += temp*A[j*lda + i];
916 x[j] *= A[j*lda + j];
922 for (j=n-ione; j>-ione; j--) {
926 for (i=n-ione; i>j; i--) {
927 x[ix] += temp*A[j*lda + i];
931 x[jx] *= A[j*lda + j];
939 if (EUploChar[uplo]==
'U') {
942 for (j=n-ione; j>-ione; j--) {
946 temp *= A[j*lda + j];
947 for (i=j-ione; i>-ione; i--) {
948 temp += A[j*lda + i]*x[i];
953 for (i=j-ione; i>-ione; i--) {
960 jx = kx + (n-ione)*incx;
961 for (j=n-ione; j>-ione; j--) {
966 temp *= A[j*lda + j];
967 for (i=j-ione; i>-ione; i--) {
969 temp += A[j*lda + i]*x[ix];
974 for (i=j-ione; i>-ione; i--) {
986 for (j=izero; j<n; j++) {
990 temp *= A[j*lda + j];
991 for (i=j+ione; i<n; i++) {
992 temp += A[j*lda + i]*x[i];
997 for (i=j+ione; i<n; i++) {
1005 for (j=izero; j<n; j++) {
1010 temp *= A[j*lda + j];
1011 for (i=j+ione; i<n; i++) {
1013 temp += A[j*lda + i]*x[ix];
1018 for (i=j+ione; i<n; i++) {
1032 template<
typename OrdinalType,
typename ScalarType>
1033 template <
typename alpha_type,
typename x_type,
typename y_type>
1034 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const x_type* x,
const OrdinalType& incx,
const y_type* y,
const OrdinalType& incy, ScalarType* A,
const OrdinalType& lda)
const
1040 bool BadArgument =
false;
1043 if( m == izero || n == izero || alpha == alpha_zero ){
return; }
1047 std::cout <<
"BLAS::GER Error: M == " << m << std::endl;
1051 std::cout <<
"BLAS::GER Error: N == " << n << std::endl;
1055 std::cout <<
"BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1059 std::cout <<
"BLAS::GER Error: INCX == 0"<< std::endl;
1063 std::cout <<
"BLAS::GER Error: INCY == 0"<< std::endl;
1068 OrdinalType i, j, ix, jy = izero, kx = izero;
1072 if (incx < izero) { kx = (-m+ione)*incx; }
1073 if (incy < izero) { jy = (-n+ione)*incy; }
1076 if( incx == ione ) {
1077 for( j=izero; j<n; j++ ) {
1078 if ( y[jy] != y_zero ) {
1080 for ( i=izero; i<m; i++ ) {
1081 A[j*lda + i] += x[i]*temp;
1088 for( j=izero; j<n; j++ ) {
1089 if ( y[jy] != y_zero ) {
1092 for( i=izero; i<m; i++ ) {
1093 A[j*lda + i] += x[ix]*temp;
1107 template<
typename OrdinalType,
typename ScalarType>
1108 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1109 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(
ETransp transa,
ETransp transb,
const OrdinalType& m,
const OrdinalType& n,
const OrdinalType& k,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const B_type* B,
const OrdinalType& ldb,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const
1118 OrdinalType i, j, p;
1119 OrdinalType NRowA = m, NRowB = k;
1121 bool BadArgument =
false;
1122 bool conjA =
false, conjB =
false;
1125 if( !(ETranspChar[transa]==
'N') ) {
1128 if( !(ETranspChar[transb]==
'N') ) {
1133 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){
return; }
1135 std::cout <<
"BLAS::GEMM Error: M == " << m << std::endl;
1139 std::cout <<
"BLAS::GEMM Error: N == " << n << std::endl;
1143 std::cout <<
"BLAS::GEMM Error: K == " << k << std::endl;
1147 std::cout <<
"BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1151 std::cout <<
"BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1155 std::cout <<
"BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1162 conjA = (ETranspChar[transa] ==
'C');
1163 conjB = (ETranspChar[transb] ==
'C');
1166 if( alpha == alpha_zero ) {
1167 if( beta == beta_zero ) {
1168 for (j=izero; j<n; j++) {
1169 for (i=izero; i<m; i++) {
1170 C[j*ldc + i] = C_zero;
1174 for (j=izero; j<n; j++) {
1175 for (i=izero; i<m; i++) {
1176 C[j*ldc + i] *= beta;
1185 if ( ETranspChar[transb]==
'N' ) {
1186 if ( ETranspChar[transa]==
'N' ) {
1188 for (j=izero; j<n; j++) {
1189 if( beta == beta_zero ) {
1190 for (i=izero; i<m; i++){
1191 C[j*ldc + i] = C_zero;
1193 }
else if( beta != beta_one ) {
1194 for (i=izero; i<m; i++){
1195 C[j*ldc + i] *= beta;
1198 for (p=izero; p<k; p++){
1199 if (B[j*ldb + p] != B_zero ){
1200 temp = alpha*B[j*ldb + p];
1201 for (i=izero; i<m; i++) {
1202 C[j*ldc + i] += temp*A[p*lda + i];
1207 }
else if ( conjA ) {
1209 for (j=izero; j<n; j++) {
1210 for (i=izero; i<m; i++) {
1212 for (p=izero; p<k; p++) {
1215 if (beta == beta_zero) {
1216 C[j*ldc + i] = alpha*temp;
1218 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1224 for (j=izero; j<n; j++) {
1225 for (i=izero; i<m; i++) {
1227 for (p=izero; p<k; p++) {
1228 temp += A[i*lda + p]*B[j*ldb + p];
1230 if (beta == beta_zero) {
1231 C[j*ldc + i] = alpha*temp;
1233 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1238 }
else if ( ETranspChar[transa]==
'N' ) {
1241 for (j=izero; j<n; j++) {
1242 if (beta == beta_zero) {
1243 for (i=izero; i<m; i++) {
1244 C[j*ldc + i] = C_zero;
1246 }
else if ( beta != beta_one ) {
1247 for (i=izero; i<m; i++) {
1248 C[j*ldc + i] *= beta;
1251 for (p=izero; p<k; p++) {
1252 if (B[p*ldb + j] != B_zero) {
1254 for (i=izero; i<m; i++) {
1255 C[j*ldc + i] += temp*A[p*lda + i];
1262 for (j=izero; j<n; j++) {
1263 if (beta == beta_zero) {
1264 for (i=izero; i<m; i++) {
1265 C[j*ldc + i] = C_zero;
1267 }
else if ( beta != beta_one ) {
1268 for (i=izero; i<m; i++) {
1269 C[j*ldc + i] *= beta;
1272 for (p=izero; p<k; p++) {
1273 if (B[p*ldb + j] != B_zero) {
1274 temp = alpha*B[p*ldb + j];
1275 for (i=izero; i<m; i++) {
1276 C[j*ldc + i] += temp*A[p*lda + i];
1282 }
else if ( conjA ) {
1285 for (j=izero; j<n; j++) {
1286 for (i=izero; i<m; i++) {
1288 for (p=izero; p<k; p++) {
1292 if (beta == beta_zero) {
1293 C[j*ldc + i] = alpha*temp;
1295 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1301 for (j=izero; j<n; j++) {
1302 for (i=izero; i<m; i++) {
1304 for (p=izero; p<k; p++) {
1308 if (beta == beta_zero) {
1309 C[j*ldc + i] = alpha*temp;
1311 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1319 for (j=izero; j<n; j++) {
1320 for (i=izero; i<m; i++) {
1322 for (p=izero; p<k; p++) {
1323 temp += A[i*lda + p]
1326 if (beta == beta_zero) {
1327 C[j*ldc + i] = alpha*temp;
1329 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1335 for (j=izero; j<n; j++) {
1336 for (i=izero; i<m; i++) {
1338 for (p=izero; p<k; p++) {
1339 temp += A[i*lda + p]*B[p*ldb + j];
1341 if (beta == beta_zero) {
1342 C[j*ldc + i] = alpha*temp;
1344 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1354 template<
typename OrdinalType,
typename ScalarType>
1356 SWAP (
const OrdinalType& n, ScalarType*
const x,
const OrdinalType& incx,
1357 ScalarType*
const y,
const OrdinalType& incy)
const
1363 if (incx == 1 && incy == 1) {
1364 for (
int i = 0; i < n; ++i) {
1365 ScalarType tmp = x[i];
1375 ix = (1-n) * incx + 1;
1378 iy = (1-n) * incy + 1;
1381 for (
int i = 1; i <= n; ++i) {
1382 ScalarType tmp = x[ix - 1];
1383 x[ix - 1] = y[iy - 1];
1391 template<
typename OrdinalType,
typename ScalarType>
1392 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1393 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(
ESide side,
EUplo uplo,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const B_type* B,
const OrdinalType& ldb,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const
1401 OrdinalType i, j, k, NRowA = m;
1402 ScalarType temp1, temp2;
1403 bool BadArgument =
false;
1404 bool Upper = (EUploChar[uplo] ==
'U');
1405 if (ESideChar[side] ==
'R') { NRowA = n; }
1408 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) {
return; }
1410 std::cout <<
"BLAS::SYMM Error: M == "<< m << std::endl;
1411 BadArgument =
true; }
1413 std::cout <<
"BLAS::SYMM Error: N == "<< n << std::endl;
1414 BadArgument =
true; }
1416 std::cout <<
"BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1417 BadArgument =
true; }
1419 std::cout <<
"BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1420 BadArgument =
true; }
1422 std::cout <<
"BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1423 BadArgument =
true; }
1428 if (alpha == alpha_zero) {
1429 if (beta == beta_zero ) {
1430 for (j=izero; j<n; j++) {
1431 for (i=izero; i<m; i++) {
1432 C[j*ldc + i] = C_zero;
1436 for (j=izero; j<n; j++) {
1437 for (i=izero; i<m; i++) {
1438 C[j*ldc + i] *= beta;
1445 if ( ESideChar[side] ==
'L') {
1450 for (j=izero; j<n; j++) {
1451 for (i=izero; i<m; i++) {
1452 temp1 = alpha*B[j*ldb + i];
1454 for (k=izero; k<i; k++) {
1455 C[j*ldc + k] += temp1*A[i*lda + k];
1456 temp2 += B[j*ldb + k]*A[i*lda + k];
1458 if (beta == beta_zero) {
1459 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1461 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1467 for (j=izero; j<n; j++) {
1468 for (i=m-ione; i>-ione; i--) {
1469 temp1 = alpha*B[j*ldb + i];
1471 for (k=i+ione; k<m; k++) {
1472 C[j*ldc + k] += temp1*A[i*lda + k];
1473 temp2 += B[j*ldb + k]*A[i*lda + k];
1475 if (beta == beta_zero) {
1476 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1478 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1485 for (j=izero; j<n; j++) {
1486 temp1 = alpha*A[j*lda + j];
1487 if (beta == beta_zero) {
1488 for (i=izero; i<m; i++) {
1489 C[j*ldc + i] = temp1*B[j*ldb + i];
1492 for (i=izero; i<m; i++) {
1493 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1496 for (k=izero; k<j; k++) {
1498 temp1 = alpha*A[j*lda + k];
1500 temp1 = alpha*A[k*lda + j];
1502 for (i=izero; i<m; i++) {
1503 C[j*ldc + i] += temp1*B[k*ldb + i];
1506 for (k=j+ione; k<n; k++) {
1508 temp1 = alpha*A[k*lda + j];
1510 temp1 = alpha*A[j*lda + k];
1512 for (i=izero; i<m; i++) {
1513 C[j*ldc + i] += temp1*B[k*ldb + i];
1521 template<
typename OrdinalType,
typename ScalarType>
1522 template <
typename alpha_type,
typename A_type,
typename beta_type>
1523 void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType& n,
const OrdinalType& k,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda,
const beta_type beta, ScalarType* C,
const OrdinalType& ldc)
const
1534 OrdinalType i, j, l, NRowA = n;
1535 bool BadArgument =
false;
1536 bool Upper = (EUploChar[uplo] ==
'U');
1542 "Teuchos::BLAS<"<<OTNT::name()<<
","<<STNT::name()<<
">::SYRK()"
1543 " does not support CONJ_TRANS for complex data types."
1547 if( !(ETranspChar[trans]==
'N') ) {
1552 if ( n==izero ) {
return; }
1553 if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) {
return; }
1555 std::cout <<
"BLAS::SYRK Error: N == "<< n <<std::endl;
1556 BadArgument =
true; }
1558 std::cout <<
"BLAS::SYRK Error: K == "<< k <<std::endl;
1559 BadArgument =
true; }
1561 std::cout <<
"BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1562 BadArgument =
true; }
1564 std::cout <<
"BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1565 BadArgument =
true; }
1570 if (alpha == alpha_zero) {
1572 if (beta==beta_zero) {
1573 for (j=izero; j<n; j++) {
1574 for (i=izero; i<=j; i++) {
1575 C[j*ldc + i] = C_zero;
1580 for (j=izero; j<n; j++) {
1581 for (i=izero; i<=j; i++) {
1582 C[j*ldc + i] *= beta;
1588 if (beta==beta_zero) {
1589 for (j=izero; j<n; j++) {
1590 for (i=j; i<n; i++) {
1591 C[j*ldc + i] = C_zero;
1596 for (j=izero; j<n; j++) {
1597 for (i=j; i<n; i++) {
1598 C[j*ldc + i] *= beta;
1608 if ( ETranspChar[trans]==
'N' ) {
1612 for (j=izero; j<n; j++) {
1613 if (beta==beta_zero) {
1614 for (i=izero; i<=j; i++) {
1615 C[j*ldc + i] = C_zero;
1618 else if (beta!=beta_one) {
1619 for (i=izero; i<=j; i++) {
1620 C[j*ldc + i] *= beta;
1623 for (l=izero; l<k; l++) {
1624 if (A[l*lda + j] != A_zero) {
1625 temp = alpha*A[l*lda + j];
1626 for (i = izero; i <=j; i++) {
1627 C[j*ldc + i] += temp*A[l*lda + i];
1634 for (j=izero; j<n; j++) {
1635 if (beta==beta_zero) {
1636 for (i=j; i<n; i++) {
1637 C[j*ldc + i] = C_zero;
1640 else if (beta!=beta_one) {
1641 for (i=j; i<n; i++) {
1642 C[j*ldc + i] *= beta;
1645 for (l=izero; l<k; l++) {
1646 if (A[l*lda + j] != A_zero) {
1647 temp = alpha*A[l*lda + j];
1648 for (i=j; i<n; i++) {
1649 C[j*ldc + i] += temp*A[l*lda + i];
1660 for (j=izero; j<n; j++) {
1661 for (i=izero; i<=j; i++) {
1663 for (l=izero; l<k; l++) {
1664 temp += A[i*lda + l]*A[j*lda + l];
1666 if (beta==beta_zero) {
1667 C[j*ldc + i] = alpha*temp;
1670 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1676 for (j=izero; j<n; j++) {
1677 for (i=j; i<n; i++) {
1679 for (l=izero; l<k; ++l) {
1680 temp += A[i*lda + l]*A[j*lda + l];
1682 if (beta==beta_zero) {
1683 C[j*ldc + i] = alpha*temp;
1686 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1695 template<
typename OrdinalType,
typename ScalarType>
1696 template <
typename alpha_type,
typename A_type>
1697 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda, ScalarType* B,
const OrdinalType& ldb)
const
1705 OrdinalType i, j, k, NRowA = m;
1707 bool BadArgument =
false;
1708 bool LSide = (ESideChar[side] ==
'L');
1709 bool noUnit = (EDiagChar[diag] ==
'N');
1710 bool Upper = (EUploChar[uplo] ==
'U');
1711 bool noConj = (ETranspChar[transa] ==
'T');
1713 if(!LSide) { NRowA = n; }
1716 if (n==izero || m==izero) {
return; }
1718 std::cout <<
"BLAS::TRMM Error: M == "<< m <<std::endl;
1719 BadArgument =
true; }
1721 std::cout <<
"BLAS::TRMM Error: N == "<< n <<std::endl;
1722 BadArgument =
true; }
1724 std::cout <<
"BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1725 BadArgument =
true; }
1727 std::cout <<
"BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1728 BadArgument =
true; }
1733 if( alpha == alpha_zero ) {
1734 for( j=izero; j<n; j++ ) {
1735 for( i=izero; i<m; i++ ) {
1736 B[j*ldb + i] = B_zero;
1746 if ( ETranspChar[transa]==
'N' ) {
1751 for( j=izero; j<n; j++ ) {
1752 for( k=izero; k<m; k++) {
1753 if ( B[j*ldb + k] != B_zero ) {
1754 temp = alpha*B[j*ldb + k];
1755 for( i=izero; i<k; i++ ) {
1756 B[j*ldb + i] += temp*A[k*lda + i];
1759 temp *=A[k*lda + k];
1760 B[j*ldb + k] = temp;
1766 for( j=izero; j<n; j++ ) {
1767 for( k=m-ione; k>-ione; k-- ) {
1768 if( B[j*ldb + k] != B_zero ) {
1769 temp = alpha*B[j*ldb + k];
1770 B[j*ldb + k] = temp;
1772 B[j*ldb + k] *= A[k*lda + k];
1773 for( i=k+ione; i<m; i++ ) {
1774 B[j*ldb + i] += temp*A[k*lda + i];
1783 for( j=izero; j<n; j++ ) {
1784 for( i=m-ione; i>-ione; i-- ) {
1785 temp = B[j*ldb + i];
1788 temp *= A[i*lda + i];
1789 for( k=izero; k<i; k++ ) {
1790 temp += A[i*lda + k]*B[j*ldb + k];
1795 for( k=izero; k<i; k++ ) {
1799 B[j*ldb + i] = alpha*temp;
1803 for( j=izero; j<n; j++ ) {
1804 for( i=izero; i<m; i++ ) {
1805 temp = B[j*ldb + i];
1808 temp *= A[i*lda + i];
1809 for( k=i+ione; k<m; k++ ) {
1810 temp += A[i*lda + k]*B[j*ldb + k];
1815 for( k=i+ione; k<m; k++ ) {
1819 B[j*ldb + i] = alpha*temp;
1827 if( ETranspChar[transa] ==
'N' ) {
1832 for( j=n-ione; j>-ione; j-- ) {
1835 temp *= A[j*lda + j];
1836 for( i=izero; i<m; i++ ) {
1837 B[j*ldb + i] *= temp;
1839 for( k=izero; k<j; k++ ) {
1840 if( A[j*lda + k] != A_zero ) {
1841 temp = alpha*A[j*lda + k];
1842 for( i=izero; i<m; i++ ) {
1843 B[j*ldb + i] += temp*B[k*ldb + i];
1850 for( j=izero; j<n; j++ ) {
1853 temp *= A[j*lda + j];
1854 for( i=izero; i<m; i++ ) {
1855 B[j*ldb + i] *= temp;
1857 for( k=j+ione; k<n; k++ ) {
1858 if( A[j*lda + k] != A_zero ) {
1859 temp = alpha*A[j*lda + k];
1860 for( i=izero; i<m; i++ ) {
1861 B[j*ldb + i] += temp*B[k*ldb + i];
1871 for( k=izero; k<n; k++ ) {
1872 for( j=izero; j<k; j++ ) {
1873 if( A[k*lda + j] != A_zero ) {
1875 temp = alpha*A[k*lda + j];
1878 for( i=izero; i<m; i++ ) {
1879 B[j*ldb + i] += temp*B[k*ldb + i];
1886 temp *= A[k*lda + k];
1891 for( i=izero; i<m; i++ ) {
1892 B[k*ldb + i] *= temp;
1897 for( k=n-ione; k>-ione; k-- ) {
1898 for( j=k+ione; j<n; j++ ) {
1899 if( A[k*lda + j] != A_zero ) {
1901 temp = alpha*A[k*lda + j];
1904 for( i=izero; i<m; i++ ) {
1905 B[j*ldb + i] += temp*B[k*ldb + i];
1912 temp *= A[k*lda + k];
1917 for( i=izero; i<m; i++ ) {
1918 B[k*ldb + i] *= temp;
1928 template<
typename OrdinalType,
typename ScalarType>
1929 template <
typename alpha_type,
typename A_type>
1930 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType& m,
const OrdinalType& n,
const alpha_type alpha,
const A_type* A,
const OrdinalType& lda, ScalarType* B,
const OrdinalType& ldb)
const
1940 OrdinalType NRowA = m;
1941 bool BadArgument =
false;
1942 bool noUnit = (EDiagChar[diag]==
'N');
1943 bool noConj = (ETranspChar[transa] ==
'T');
1945 if (!(ESideChar[side] ==
'L')) { NRowA = n; }
1948 if (n == izero || m == izero) {
return; }
1950 std::cout <<
"BLAS::TRSM Error: M == "<<m<<std::endl;
1951 BadArgument =
true; }
1953 std::cout <<
"BLAS::TRSM Error: N == "<<n<<std::endl;
1954 BadArgument =
true; }
1956 std::cout <<
"BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1957 BadArgument =
true; }
1959 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1960 BadArgument =
true; }
1966 if(alpha == alpha_zero) {
1967 for(j = izero; j < n; j++) {
1968 for( i = izero; i < m; i++) {
1969 B[j*ldb+i] = B_zero;
1975 if(ESideChar[side] ==
'L') {
1979 if(ETranspChar[transa] ==
'N') {
1983 if(EUploChar[uplo] ==
'U') {
1985 for(j = izero; j < n; j++) {
1987 if(alpha != alpha_one) {
1988 for( i = izero; i < m; i++) {
1989 B[j*ldb+i] *= alpha;
1993 for(k = (m - ione); k > -ione; k--) {
1995 if (B[j*ldb + k] != B_zero) {
1997 B[j*ldb + k] /= A[k*lda + k];
1999 for(i = izero; i < k; i++) {
2000 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2008 for(j = izero; j < n; j++) {
2010 if(alpha != alpha_one) {
2011 for( i = izero; i < m; i++) {
2012 B[j*ldb+i] *= alpha;
2016 for(k = izero; k < m; k++) {
2018 if (B[j*ldb + k] != B_zero) {
2020 B[j*ldb + k] /= A[k*lda + k];
2022 for(i = k+ione; i < m; i++) {
2023 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2035 if(EUploChar[uplo] ==
'U') {
2037 for(j = izero; j < n; j++) {
2038 for( i = izero; i < m; i++) {
2039 temp = alpha*B[j*ldb+i];
2041 for(k = izero; k < i; k++) {
2042 temp -= A[i*lda + k] * B[j*ldb + k];
2045 temp /= A[i*lda + i];
2048 for(k = izero; k < i; k++) {
2056 B[j*ldb + i] = temp;
2062 for(j = izero; j < n; j++) {
2063 for(i = (m - ione) ; i > -ione; i--) {
2064 temp = alpha*B[j*ldb+i];
2066 for(k = i+ione; k < m; k++) {
2067 temp -= A[i*lda + k] * B[j*ldb + k];
2070 temp /= A[i*lda + i];
2073 for(k = i+ione; k < m; k++) {
2081 B[j*ldb + i] = temp;
2092 if (ETranspChar[transa] ==
'N') {
2096 if(EUploChar[uplo] ==
'U') {
2099 for(j = izero; j < n; j++) {
2101 if(alpha != alpha_one) {
2102 for( i = izero; i < m; i++) {
2103 B[j*ldb+i] *= alpha;
2106 for(k = izero; k < j; k++) {
2108 if (A[j*lda + k] != A_zero) {
2109 for(i = izero; i < m; i++) {
2110 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2115 temp = B_one/A[j*lda + j];
2116 for(i = izero; i < m; i++) {
2117 B[j*ldb + i] *= temp;
2124 for(j = (n - ione); j > -ione; j--) {
2126 if(alpha != alpha_one) {
2127 for( i = izero; i < m; i++) {
2128 B[j*ldb+i] *= alpha;
2132 for(k = j+ione; k < n; k++) {
2134 if (A[j*lda + k] != A_zero) {
2135 for(i = izero; i < m; i++) {
2136 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2141 temp = B_one/A[j*lda + j];
2142 for(i = izero; i < m; i++) {
2143 B[j*ldb + i] *= temp;
2154 if(EUploChar[uplo] ==
'U') {
2156 for(k = (n - ione); k > -ione; k--) {
2159 temp = B_one/A[k*lda + k];
2162 for(i = izero; i < m; i++) {
2163 B[k*ldb + i] *= temp;
2166 for(j = izero; j < k; j++) {
2167 if (A[k*lda + j] != A_zero) {
2169 temp = A[k*lda + j];
2172 for(i = izero; i < m; i++) {
2173 B[j*ldb + i] -= temp*B[k*ldb + i];
2177 if (alpha != alpha_one) {
2178 for (i = izero; i < m; i++) {
2179 B[k*ldb + i] *= alpha;
2186 for(k = izero; k < n; k++) {
2189 temp = B_one/A[k*lda + k];
2192 for (i = izero; i < m; i++) {
2193 B[k*ldb + i] *= temp;
2196 for(j = k+ione; j < n; j++) {
2197 if(A[k*lda + j] != A_zero) {
2199 temp = A[k*lda + j];
2202 for(i = izero; i < m; i++) {
2203 B[j*ldb + i] -= temp*B[k*ldb + i];
2207 if (alpha != alpha_one) {
2208 for (i = izero; i < m; i++) {
2209 B[k*ldb + i] *= alpha;
2223 class TEUCHOSNUMERICS_LIB_DLL_EXPORT
BLAS<int, float>
2226 inline BLAS(
void) {}
2227 inline BLAS(
const BLAS<int, float>& ) {}
2228 inline virtual ~BLAS(
void) {}
2229 void ROTG(
float* da,
float* db,
float* c,
float* s)
const;
2230 void ROT(
const int& n,
float* dx,
const int& incx,
float* dy,
const int& incy,
float* c,
float* s)
const;
2231 float ASUM(
const int& n,
const float* x,
const int& incx)
const;
2232 void AXPY(
const int& n,
const float& alpha,
const float* x,
const int& incx,
float* y,
const int& incy)
const;
2233 void COPY(
const int& n,
const float* x,
const int& incx,
float* y,
const int& incy)
const;
2234 float DOT(
const int& n,
const float* x,
const int& incx,
const float* y,
const int& incy)
const;
2235 float NRM2(
const int& n,
const float* x,
const int& incx)
const;
2236 void SCAL(
const int& n,
const float& alpha,
float* x,
const int& incx)
const;
2237 int IAMAX(
const int& n,
const float* x,
const int& incx)
const;
2238 void GEMV(
ETransp trans,
const int& m,
const int& n,
const float& alpha,
const float* A,
const int& lda,
const float* x,
const int& incx,
const float& beta,
float* y,
const int& incy)
const;
2239 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int& n,
const float* A,
const int& lda,
float* x,
const int& incx)
const;
2240 void GER(
const int& m,
const int& n,
const float& alpha,
const float* x,
const int& incx,
const float* y,
const int& incy,
float* A,
const int& lda)
const;
2241 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int& n,
const int& k,
const float& alpha,
const float* A,
const int& lda,
const float* B,
const int& ldb,
const float& beta,
float* C,
const int& ldc)
const;
2242 void SWAP(
const int& n,
float*
const x,
const int& incx,
float*
const y,
const int& incy)
const;
2243 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int& n,
const float& alpha,
const float* A,
const int& lda,
const float* B,
const int& ldb,
const float& beta,
float* C,
const int& ldc)
const;
2244 void SYRK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const float& alpha,
const float* A,
const int& lda,
const float& beta,
float* C,
const int& ldc)
const;
2245 void HERK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const float& alpha,
const float* A,
const int& lda,
const float& beta,
float* C,
const int& ldc)
const;
2246 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const float& alpha,
const float* A,
const int& lda,
float* B,
const int& ldb)
const;
2247 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const float& alpha,
const float* A,
const int& lda,
float* B,
const int& ldb)
const;
2253 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
2256 inline BLAS(
void) {}
2257 inline BLAS(
const BLAS<int, double>& ) {}
2258 inline virtual ~BLAS(
void) {}
2259 void ROTG(
double* da,
double* db,
double* c,
double* s)
const;
2260 void ROT(
const int& n,
double* dx,
const int& incx,
double* dy,
const int& incy,
double* c,
double* s)
const;
2261 double ASUM(
const int& n,
const double* x,
const int& incx)
const;
2262 void AXPY(
const int& n,
const double& alpha,
const double* x,
const int& incx,
double* y,
const int& incy)
const;
2263 void COPY(
const int& n,
const double* x,
const int& incx,
double* y,
const int& incy)
const;
2264 double DOT(
const int& n,
const double* x,
const int& incx,
const double* y,
const int& incy)
const;
2265 double NRM2(
const int& n,
const double* x,
const int& incx)
const;
2266 void SCAL(
const int& n,
const double& alpha,
double* x,
const int& incx)
const;
2267 int IAMAX(
const int& n,
const double* x,
const int& incx)
const;
2268 void GEMV(
ETransp trans,
const int& m,
const int& n,
const double& alpha,
const double* A,
const int& lda,
const double* x,
const int& incx,
const double& beta,
double* y,
const int& incy)
const;
2269 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int& n,
const double* A,
const int& lda,
double* x,
const int& incx)
const;
2270 void GER(
const int& m,
const int& n,
const double& alpha,
const double* x,
const int& incx,
const double* y,
const int& incy,
double* A,
const int& lda)
const;
2271 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int& n,
const int& k,
const double& alpha,
const double* A,
const int& lda,
const double* B,
const int& ldb,
const double& beta,
double* C,
const int& ldc)
const;
2272 void SWAP(
const int& n,
double*
const x,
const int& incx,
double*
const y,
const int& incy)
const;
2273 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int& n,
const double& alpha,
const double* A,
const int& lda,
const double* B,
const int& ldb,
const double& beta,
double* C,
const int& ldc)
const;
2274 void SYRK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const double& alpha,
const double* A,
const int& lda,
const double& beta,
double* C,
const int& ldc)
const;
2275 void HERK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const double& alpha,
const double* A,
const int& lda,
const double& beta,
double* C,
const int& ldc)
const;
2276 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const double& alpha,
const double* A,
const int& lda,
double* B,
const int& ldb)
const;
2277 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const double& alpha,
const double* A,
const int& lda,
double* B,
const int& ldb)
const;
2283 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
2286 inline BLAS(
void) {}
2287 inline BLAS(
const BLAS<
int, std::complex<float> >& ) {}
2288 inline virtual ~BLAS(
void) {}
2289 void ROTG(std::complex<float>* da, std::complex<float>* db,
float* c, std::complex<float>* s)
const;
2290 void ROT(
const int& n, std::complex<float>* dx,
const int& incx, std::complex<float>* dy,
const int& incy,
float* c, std::complex<float>* s)
const;
2291 float ASUM(
const int& n,
const std::complex<float>* x,
const int& incx)
const;
2292 void AXPY(
const int& n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int& incx, std::complex<float>* y,
const int& incy)
const;
2293 void COPY(
const int& n,
const std::complex<float>* x,
const int& incx, std::complex<float>* y,
const int& incy)
const;
2294 std::complex<float> DOT(
const int& n,
const std::complex<float>* x,
const int& incx,
const std::complex<float>* y,
const int& incy)
const;
2295 float NRM2(
const int& n,
const std::complex<float>* x,
const int& incx)
const;
2296 void SCAL(
const int& n,
const std::complex<float> alpha, std::complex<float>* x,
const int& incx)
const;
2297 int IAMAX(
const int& n,
const std::complex<float>* x,
const int& incx)
const;
2298 void GEMV(
ETransp trans,
const int& m,
const int& n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda,
const std::complex<float>* x,
const int& incx,
const std::complex<float> beta, std::complex<float>* y,
const int& incy)
const;
2299 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int& n,
const std::complex<float>* A,
const int& lda, std::complex<float>* x,
const int& incx)
const;
2300 void GER(
const int& m,
const int& n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int& incx,
const std::complex<float>* y,
const int& incy, std::complex<float>* A,
const int& lda)
const;
2301 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int& n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda,
const std::complex<float>* B,
const int& ldb,
const std::complex<float> beta, std::complex<float>* C,
const int& ldc)
const;
2302 void SWAP(
const int& n, std::complex<float>*
const x,
const int& incx, std::complex<float>*
const y,
const int& incy)
const;
2303 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int& n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda,
const std::complex<float> *B,
const int& ldb,
const std::complex<float> beta, std::complex<float> *C,
const int& ldc)
const;
2304 void SYRK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda,
const std::complex<float> beta, std::complex<float>* C,
const int& ldc)
const;
2305 void HERK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda,
const std::complex<float> beta, std::complex<float>* C,
const int& ldc)
const;
2306 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda, std::complex<float>* B,
const int& ldb)
const;
2307 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int& lda, std::complex<float>* B,
const int& ldb)
const;
2313 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
2316 inline BLAS(
void) {}
2317 inline BLAS(
const BLAS<
int, std::complex<double> >& ) {}
2318 inline virtual ~BLAS(
void) {}
2319 void ROTG(std::complex<double>* da, std::complex<double>* db,
double* c, std::complex<double>* s)
const;
2320 void ROT(
const int& n, std::complex<double>* dx,
const int& incx, std::complex<double>* dy,
const int& incy,
double* c, std::complex<double>* s)
const;
2321 double ASUM(
const int& n,
const std::complex<double>* x,
const int& incx)
const;
2322 void AXPY(
const int& n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int& incx, std::complex<double>* y,
const int& incy)
const;
2323 void COPY(
const int& n,
const std::complex<double>* x,
const int& incx, std::complex<double>* y,
const int& incy)
const;
2324 std::complex<double> DOT(
const int& n,
const std::complex<double>* x,
const int& incx,
const std::complex<double>* y,
const int& incy)
const;
2325 double NRM2(
const int& n,
const std::complex<double>* x,
const int& incx)
const;
2326 void SCAL(
const int& n,
const std::complex<double> alpha, std::complex<double>* x,
const int& incx)
const;
2327 int IAMAX(
const int& n,
const std::complex<double>* x,
const int& incx)
const;
2328 void GEMV(
ETransp trans,
const int& m,
const int& n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda,
const std::complex<double>* x,
const int& incx,
const std::complex<double> beta, std::complex<double>* y,
const int& incy)
const;
2329 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int& n,
const std::complex<double>* A,
const int& lda, std::complex<double>* x,
const int& incx)
const;
2330 void GER(
const int& m,
const int& n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int& incx,
const std::complex<double>* y,
const int& incy, std::complex<double>* A,
const int& lda)
const;
2331 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int& n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda,
const std::complex<double>* B,
const int& ldb,
const std::complex<double> beta, std::complex<double>* C,
const int& ldc)
const;
2332 void SWAP(
const int& n, std::complex<double>*
const x,
const int& incx, std::complex<double>*
const y,
const int& incy)
const;
2333 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int& n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda,
const std::complex<double> *B,
const int& ldb,
const std::complex<double> beta, std::complex<double> *C,
const int& ldc)
const;
2334 void SYRK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda,
const std::complex<double> beta, std::complex<double>* C,
const int& ldc)
const;
2335 void HERK(
EUplo uplo,
ETransp trans,
const int& n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda,
const std::complex<double> beta, std::complex<double>* C,
const int& ldc)
const;
2336 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda, std::complex<double>* B,
const int& ldb)
const;
2337 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int& n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int& lda, std::complex<double>* B,
const int& ldb)
const;
2342 #endif // _TEUCHOS_BLAS_HPP_
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
static T one()
Returns representation of one for this ordinal type.
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y <- y+alpha*x.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
Enumerated types for BLAS input characters.
virtual ~DefaultBLASImpl(void)
Destructor.
T magnitudeType
Mandatory typedef for result of magnitude.
virtual ~BLAS(void)
Destructor.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/low...
static T zero()
Returns representation of zero for this ordinal type.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
BLAS(void)
Default constructor.
DefaultBLASImpl(void)
Default constructor.
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
This structure defines some basic traits for a scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
static T conjugate(T a)
Returns the conjugate of the scalar type a.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
Defines basic traits for the ordinal field type.
Default traits class that just returns typeid(T).name().
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
Default implementation for BLAS routines.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
static T one()
Returns representation of one for this scalar type.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.