59 #ifndef _TEUCHOS_BLAS_HPP_
60 #define _TEUCHOS_BLAS_HPP_
117 template<
typename ScalarType,
128 template<
typename OrdinalType,
typename ScalarType>
157 void ROTG(ScalarType* da, ScalarType* db,
rotg_c_type* c, ScalarType* s)
const;
160 void ROT(
const OrdinalType&
n, ScalarType* dx,
const OrdinalType& incx, ScalarType* dy,
const OrdinalType& incy,
MagnitudeType* c, ScalarType* s)
const;
163 void SCAL(
const OrdinalType&
n,
const ScalarType& alpha, ScalarType* x,
const OrdinalType& incx)
const;
166 void COPY(
const OrdinalType&
n,
const ScalarType* x,
const OrdinalType& incx, ScalarType* y,
const OrdinalType& incy)
const;
169 template <
typename alpha_type,
typename x_type>
170 void AXPY(
const OrdinalType&
n,
const alpha_type alpha,
const x_type* x,
const OrdinalType& incx, ScalarType* y,
const OrdinalType& incy)
const;
176 template <
typename x_type,
typename y_type>
177 ScalarType
DOT(
const OrdinalType&
n,
const x_type* x,
const OrdinalType& incx,
const y_type* y,
const OrdinalType& incy)
const;
183 OrdinalType
IAMAX(
const OrdinalType&
n,
const ScalarType* x,
const OrdinalType& incx)
const;
190 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
191 void GEMV(
ETransp trans,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
192 const OrdinalType& lda,
const x_type* x,
const OrdinalType& incx,
const beta_type beta, ScalarType* y,
const OrdinalType& incy)
const;
195 template <
typename A_type>
197 const OrdinalType& lda, ScalarType* x,
const OrdinalType& incx)
const;
201 template <
typename alpha_type,
typename x_type,
typename y_type>
202 void GER(
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const x_type* x,
const OrdinalType& incx,
203 const y_type* y,
const OrdinalType& incy, ScalarType*
A,
const OrdinalType& lda)
const;
215 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
216 void GEMM(
ETransp transa,
ETransp transb,
const OrdinalType& m,
const OrdinalType&
n,
const OrdinalType& k,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const B_type*
B,
const OrdinalType& ldb,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const;
220 SWAP (
const OrdinalType&
n, ScalarType*
const x,
const OrdinalType& incx,
221 ScalarType*
const y,
const OrdinalType& incy)
const;
224 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
225 void SYMM(
ESide side,
EUplo uplo,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const B_type*
B,
const OrdinalType& ldb,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const;
228 template <
typename alpha_type,
typename A_type,
typename beta_type>
229 void SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType&
n,
const OrdinalType& k,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const;
232 template <
typename alpha_type,
typename A_type>
234 const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda, ScalarType*
B,
const OrdinalType& ldb)
const;
237 template <
typename alpha_type,
typename A_type>
239 const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda, ScalarType*
B,
const OrdinalType& ldb)
const;
243 template<
typename OrdinalType,
typename ScalarType>
278 template<
typename ScalarType,
bool isComplex>
286 template<
typename ScalarType>
294 template<
typename ScalarType>
298 blas_dabs1(
const ScalarType* a, ScalarType* ret)
const;
301 template<
typename ScalarType,
bool isComplex>
305 template<
typename ScalarType>
310 ROTG (ScalarType* ca,
313 ScalarType* s)
const;
317 template<
typename ScalarType>
322 ROTG (ScalarType* da,
325 ScalarType* s)
const;
339 ScalarType
SIGN (
const ScalarType& x,
const ScalarType& y)
const {
342 if (y > STS::zero()) {
343 return STS::magnitude (x);
344 }
else if (y < STS::zero()) {
345 return -STS::magnitude (x);
358 ScalarType signedInfinity = STS::one() / y;
359 if (signedInfinity > STS::zero()) {
360 return STS::magnitude (x);
365 return -STS::magnitude (x);
372 template<
typename ScalarType>
381 typedef typename STS::magnitudeType MagnitudeType;
401 MagnitudeType norm, scale;
403 if (STS::magnitude (*ca) == STM::zero()) {
408 scale = STS::magnitude (*ca) + STS::magnitude (*cb);
412 const MagnitudeType ca_scaled =
413 STS::magnitude (*ca / ScalarType(scale, STM::zero()));
414 const MagnitudeType cb_scaled =
415 STS::magnitude (*cb / ScalarType(scale, STM::zero()));
417 STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
419 alpha = *ca / STS::magnitude (*ca);
420 *c = STS::magnitude (*ca) / norm;
421 *s = alpha * STS::conjugate (*cb) / norm;
427 template<
typename ScalarType>
458 ScalarType r, roe, scale, z;
461 if (STS::magnitude (*da) > STS::magnitude (*db)) {
464 scale = STS::magnitude (*da) + STS::magnitude (*db);
465 if (scale == STS::zero()) {
474 const ScalarType da_scaled = *da / scale;
475 const ScalarType db_scaled = *db / scale;
476 r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
477 r = SIGN (STS::one(), roe) * r;
481 if (STS::magnitude (*da) > STS::magnitude (*db)) {
484 if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
494 template<
typename ScalarType>
503 template<
typename ScalarType>
514 template<
typename OrdinalType,
typename ScalarType>
523 rotator.ROTG (da, db, c, s);
526 template<
typename OrdinalType,
typename ScalarType>
532 if (incx==1 && incy==1) {
533 for(OrdinalType i=0; i<
n; ++i) {
534 ScalarType temp = *c*dx[i] + sconj*dy[i];
535 dy[i] = *c*dy[i] - sconj*dx[i];
540 OrdinalType ix = 0, iy = 0;
541 if (incx < izero) ix = (-n+1)*incx;
542 if (incy < izero) iy = (-n+1)*incy;
543 for(OrdinalType i=0; i<
n; ++i) {
544 ScalarType temp = *c*dx[ix] + sconj*dy[iy];
545 dy[iy] = *c*dy[iy] - sconj*dx[ix];
553 template<
typename OrdinalType,
typename ScalarType>
558 OrdinalType i, ix = izero;
560 if ( n < ione || incx < ione )
564 for(i = izero; i <
n; i++)
571 template<
typename OrdinalType,
typename ScalarType>
576 OrdinalType i, ix = izero, iy = izero;
579 if (incx < izero) { ix = (-n+ione)*incx; }
580 if (incy < izero) { iy = (-n+ione)*incy; }
582 for(i = izero; i <
n; i++)
591 template<
typename OrdinalType,
typename ScalarType>
592 template <
typename alpha_type,
typename x_type>
597 OrdinalType i, ix = izero, iy = izero;
601 if (incx < izero) { ix = (-n+ione)*incx; }
602 if (incy < izero) { iy = (-n+ione)*incy; }
604 for(i = izero; i <
n; i++)
606 y[iy] += alpha * x[ix];
613 template<
typename OrdinalType,
typename ScalarType>
620 OrdinalType i, ix = izero;
622 if ( n < ione || incx < ione )
626 for (i = izero; i <
n; i++)
636 template<
typename OrdinalType,
typename ScalarType>
637 template <
typename x_type,
typename y_type>
643 OrdinalType i, ix = izero, iy = izero;
647 if (incx < izero) { ix = (-n+ione)*incx; }
648 if (incy < izero) { iy = (-n+ione)*incy; }
650 for(i = izero; i <
n; i++)
660 template<
typename OrdinalType,
typename ScalarType>
667 OrdinalType i, ix = izero;
669 if ( n < ione || incx < ione )
672 for(i = izero; i <
n; i++)
681 template<
typename OrdinalType,
typename ScalarType>
686 OrdinalType result = izero, ix = izero, i;
692 if ( n < ione || incx < ione )
699 for(i = ione; i <
n; i++)
716 template<
typename OrdinalType,
typename ScalarType>
717 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
718 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(
ETransp trans,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const x_type* x,
const OrdinalType& incx,
const beta_type beta, ScalarType* y,
const OrdinalType& incy)
const
728 bool BadArgument =
false;
731 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){
return; }
735 std::cout <<
"BLAS::GEMV Error: M == " << m << std::endl;
739 std::cout <<
"BLAS::GEMV Error: N == " << n << std::endl;
743 std::cout <<
"BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
746 if( incx == izero ) {
747 std::cout <<
"BLAS::GEMV Error: INCX == 0"<< std::endl;
750 if( incy == izero ) {
751 std::cout <<
"BLAS::GEMV Error: INCY == 0"<< std::endl;
756 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
757 OrdinalType kx = izero, ky = izero;
773 if (incx < izero ) { kx = (ione - lenx)*incx; }
774 if (incy < izero ) { ky = (ione - leny)*incy; }
778 if(beta != beta_one) {
780 if (beta == beta_zero) {
781 for(i = izero; i < leny; i++) { y[i] = y_zero; }
783 for(i = izero; i < leny; i++) { y[i] *= beta; }
786 if (beta == beta_zero) {
787 for(i = izero; i < leny; i++) {
792 for(i = izero; i < leny; i++) {
801 if(alpha == alpha_zero) {
return; }
807 for(j = izero; j <
n; j++) {
808 if (x[jx] != x_zero) {
810 for(i = izero; i < m; i++) {
811 y[i] += temp*A[j*lda + i];
817 for(j = izero; j <
n; j++) {
818 if (x[jx] != x_zero) {
821 for(i = izero; i < m; i++) {
822 y[iy] += temp*A[j*lda + i];
832 for(j = izero; j <
n; j++) {
835 for(i = izero; i < m; i++) {
836 temp += A[j*lda + i]*x[i];
839 for(i = izero; i < m; i++) {
847 for(j = izero; j <
n; j++) {
851 for (i = izero; i < m; i++) {
852 temp += A[j*lda + i]*x[ix];
856 for (i = izero; i < m; i++) {
869 template<
typename OrdinalType,
typename ScalarType>
870 template <
typename A_type>
876 bool BadArgument =
false;
880 if( n == izero ){
return; }
884 std::cout <<
"BLAS::TRMV Error: N == " << n << std::endl;
888 std::cout <<
"BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
891 if( incx == izero ) {
892 std::cout <<
"BLAS::TRMV Error: INCX == 0"<< std::endl;
897 OrdinalType i, j, ix, jx, kx = izero;
905 if (incx < izero) { kx = (-n+ione)*incx; }
913 for (j=izero; j<
n; j++) {
916 for (i=izero; i<j; i++) {
917 x[i] += temp*A[j*lda + i];
920 x[j] *= A[j*lda + j];
925 for (j=izero; j<
n; j++) {
929 for (i=izero; i<j; i++) {
930 x[ix] += temp*A[j*lda + i];
934 x[jx] *= A[j*lda + j];
941 for (j=n-ione; j>-ione; j--) {
944 for (i=n-ione; i>j; i--) {
945 x[i] += temp*A[j*lda + i];
948 x[j] *= A[j*lda + j];
954 for (j=n-ione; j>-ione; j--) {
958 for (i=n-ione; i>j; i--) {
959 x[ix] += temp*A[j*lda + i];
963 x[jx] *= A[j*lda + j];
974 for (j=n-ione; j>-ione; j--) {
978 temp *= A[j*lda + j];
979 for (i=j-ione; i>-ione; i--) {
980 temp += A[j*lda + i]*x[i];
985 for (i=j-ione; i>-ione; i--) {
992 jx = kx + (n-ione)*incx;
993 for (j=n-ione; j>-ione; j--) {
998 temp *= A[j*lda + j];
999 for (i=j-ione; i>-ione; i--) {
1001 temp += A[j*lda + i]*x[ix];
1006 for (i=j-ione; i>-ione; i--) {
1018 for (j=izero; j<
n; j++) {
1022 temp *= A[j*lda + j];
1023 for (i=j+ione; i<
n; i++) {
1024 temp += A[j*lda + i]*x[i];
1029 for (i=j+ione; i<
n; i++) {
1037 for (j=izero; j<
n; j++) {
1042 temp *= A[j*lda + j];
1043 for (i=j+ione; i<
n; i++) {
1045 temp += A[j*lda + i]*x[ix];
1050 for (i=j+ione; i<
n; i++) {
1064 template<
typename OrdinalType,
typename ScalarType>
1065 template <
typename alpha_type,
typename x_type,
typename y_type>
1072 bool BadArgument =
false;
1075 if( m == izero || n == izero || alpha == alpha_zero ){
return; }
1079 std::cout <<
"BLAS::GER Error: M == " << m << std::endl;
1083 std::cout <<
"BLAS::GER Error: N == " << n << std::endl;
1087 std::cout <<
"BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1091 std::cout <<
"BLAS::GER Error: INCX == 0"<< std::endl;
1095 std::cout <<
"BLAS::GER Error: INCY == 0"<< std::endl;
1100 OrdinalType i, j, ix, jy = izero, kx = izero;
1104 if (incx < izero) { kx = (-m+ione)*incx; }
1105 if (incy < izero) { jy = (-n+ione)*incy; }
1108 if( incx == ione ) {
1109 for( j=izero; j<
n; j++ ) {
1110 if ( y[jy] != y_zero ) {
1112 for ( i=izero; i<m; i++ ) {
1113 A[j*lda + i] += x[i]*temp;
1120 for( j=izero; j<
n; j++ ) {
1121 if ( y[jy] != y_zero ) {
1124 for( i=izero; i<m; i++ ) {
1125 A[j*lda + i] += x[ix]*temp;
1139 template<
typename OrdinalType,
typename ScalarType>
1140 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1141 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(
ETransp transa,
ETransp transb,
const OrdinalType& m,
const OrdinalType&
n,
const OrdinalType& k,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const B_type*
B,
const OrdinalType& ldb,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const
1150 OrdinalType i, j, p;
1151 OrdinalType NRowA = m, NRowB = k;
1153 bool BadArgument =
false;
1154 bool conjA =
false, conjB =
false;
1165 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){
return; }
1167 std::cout <<
"BLAS::GEMM Error: M == " << m << std::endl;
1171 std::cout <<
"BLAS::GEMM Error: N == " << n << std::endl;
1175 std::cout <<
"BLAS::GEMM Error: K == " << k << std::endl;
1179 std::cout <<
"BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1183 std::cout <<
"BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1187 std::cout <<
"BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1198 if( alpha == alpha_zero ) {
1199 if( beta == beta_zero ) {
1200 for (j=izero; j<
n; j++) {
1201 for (i=izero; i<m; i++) {
1202 C[j*ldc + i] = C_zero;
1206 for (j=izero; j<
n; j++) {
1207 for (i=izero; i<m; i++) {
1208 C[j*ldc + i] *= beta;
1220 for (j=izero; j<
n; j++) {
1221 if( beta == beta_zero ) {
1222 for (i=izero; i<m; i++){
1223 C[j*ldc + i] = C_zero;
1225 }
else if( beta != beta_one ) {
1226 for (i=izero; i<m; i++){
1227 C[j*ldc + i] *= beta;
1230 for (p=izero; p<k; p++){
1231 if (B[j*ldb + p] != B_zero ){
1232 temp = alpha*B[j*ldb + p];
1233 for (i=izero; i<m; i++) {
1234 C[j*ldc + i] += temp*A[p*lda + i];
1239 }
else if ( conjA ) {
1241 for (j=izero; j<
n; j++) {
1242 for (i=izero; i<m; i++) {
1244 for (p=izero; p<k; p++) {
1247 if (beta == beta_zero) {
1248 C[j*ldc + i] = alpha*temp;
1250 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1256 for (j=izero; j<
n; j++) {
1257 for (i=izero; i<m; i++) {
1259 for (p=izero; p<k; p++) {
1260 temp += A[i*lda + p]*B[j*ldb + p];
1262 if (beta == beta_zero) {
1263 C[j*ldc + i] = alpha*temp;
1265 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1273 for (j=izero; j<
n; j++) {
1274 if (beta == beta_zero) {
1275 for (i=izero; i<m; i++) {
1276 C[j*ldc + i] = C_zero;
1278 }
else if ( beta != beta_one ) {
1279 for (i=izero; i<m; i++) {
1280 C[j*ldc + i] *= beta;
1283 for (p=izero; p<k; p++) {
1284 if (B[p*ldb + j] != B_zero) {
1286 for (i=izero; i<m; i++) {
1287 C[j*ldc + i] += temp*A[p*lda + i];
1294 for (j=izero; j<
n; j++) {
1295 if (beta == beta_zero) {
1296 for (i=izero; i<m; i++) {
1297 C[j*ldc + i] = C_zero;
1299 }
else if ( beta != beta_one ) {
1300 for (i=izero; i<m; i++) {
1301 C[j*ldc + i] *= beta;
1304 for (p=izero; p<k; p++) {
1305 if (B[p*ldb + j] != B_zero) {
1306 temp = alpha*B[p*ldb + j];
1307 for (i=izero; i<m; i++) {
1308 C[j*ldc + i] += temp*A[p*lda + i];
1314 }
else if ( conjA ) {
1317 for (j=izero; j<
n; j++) {
1318 for (i=izero; i<m; i++) {
1320 for (p=izero; p<k; p++) {
1324 if (beta == beta_zero) {
1325 C[j*ldc + i] = alpha*temp;
1327 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1333 for (j=izero; j<
n; j++) {
1334 for (i=izero; i<m; i++) {
1336 for (p=izero; p<k; p++) {
1340 if (beta == beta_zero) {
1341 C[j*ldc + i] = alpha*temp;
1343 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1351 for (j=izero; j<
n; j++) {
1352 for (i=izero; i<m; i++) {
1354 for (p=izero; p<k; p++) {
1355 temp += A[i*lda + p]
1358 if (beta == beta_zero) {
1359 C[j*ldc + i] = alpha*temp;
1361 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1367 for (j=izero; j<
n; j++) {
1368 for (i=izero; i<m; i++) {
1370 for (p=izero; p<k; p++) {
1371 temp += A[i*lda + p]*B[p*ldb + j];
1373 if (beta == beta_zero) {
1374 C[j*ldc + i] = alpha*temp;
1376 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1386 template<
typename OrdinalType,
typename ScalarType>
1388 SWAP (
const OrdinalType&
n, ScalarType*
const x,
const OrdinalType& incx,
1389 ScalarType*
const y,
const OrdinalType& incy)
const
1395 if (incx == 1 && incy == 1) {
1396 for (
int i = 0; i <
n; ++i) {
1397 ScalarType tmp = x[i];
1407 ix = (1-
n) * incx + 1;
1410 iy = (1-
n) * incy + 1;
1413 for (
int i = 1; i <=
n; ++i) {
1414 ScalarType tmp = x[ix - 1];
1415 x[ix - 1] = y[iy - 1];
1423 template<
typename OrdinalType,
typename ScalarType>
1424 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1425 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(
ESide side,
EUplo uplo,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const B_type*
B,
const OrdinalType& ldb,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const
1433 OrdinalType i, j, k, NRowA = m;
1434 ScalarType temp1, temp2;
1435 bool BadArgument =
false;
1440 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) {
return; }
1442 std::cout <<
"BLAS::SYMM Error: M == "<< m << std::endl;
1443 BadArgument =
true; }
1445 std::cout <<
"BLAS::SYMM Error: N == "<< n << std::endl;
1446 BadArgument =
true; }
1448 std::cout <<
"BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1449 BadArgument =
true; }
1451 std::cout <<
"BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1452 BadArgument =
true; }
1454 std::cout <<
"BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1455 BadArgument =
true; }
1460 if (alpha == alpha_zero) {
1461 if (beta == beta_zero ) {
1462 for (j=izero; j<
n; j++) {
1463 for (i=izero; i<m; i++) {
1464 C[j*ldc + i] = C_zero;
1468 for (j=izero; j<
n; j++) {
1469 for (i=izero; i<m; i++) {
1470 C[j*ldc + i] *= beta;
1482 for (j=izero; j<
n; j++) {
1483 for (i=izero; i<m; i++) {
1484 temp1 = alpha*B[j*ldb + i];
1486 for (k=izero; k<i; k++) {
1487 C[j*ldc + k] += temp1*A[i*lda + k];
1488 temp2 += B[j*ldb + k]*A[i*lda + k];
1490 if (beta == beta_zero) {
1491 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1493 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1499 for (j=izero; j<
n; j++) {
1500 for (i=m-ione; i>-ione; i--) {
1501 temp1 = alpha*B[j*ldb + i];
1503 for (k=i+ione; k<m; k++) {
1504 C[j*ldc + k] += temp1*A[i*lda + k];
1505 temp2 += B[j*ldb + k]*A[i*lda + k];
1507 if (beta == beta_zero) {
1508 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1510 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1517 for (j=izero; j<
n; j++) {
1518 temp1 = alpha*A[j*lda + j];
1519 if (beta == beta_zero) {
1520 for (i=izero; i<m; i++) {
1521 C[j*ldc + i] = temp1*B[j*ldb + i];
1524 for (i=izero; i<m; i++) {
1525 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1528 for (k=izero; k<j; k++) {
1530 temp1 = alpha*A[j*lda + k];
1532 temp1 = alpha*A[k*lda + j];
1534 for (i=izero; i<m; i++) {
1535 C[j*ldc + i] += temp1*B[k*ldb + i];
1538 for (k=j+ione; k<
n; k++) {
1540 temp1 = alpha*A[k*lda + j];
1542 temp1 = alpha*A[j*lda + k];
1544 for (i=izero; i<m; i++) {
1545 C[j*ldc + i] += temp1*B[k*ldb + i];
1553 template<
typename OrdinalType,
typename ScalarType>
1554 template <
typename alpha_type,
typename A_type,
typename beta_type>
1555 void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType&
n,
const OrdinalType& k,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda,
const beta_type beta, ScalarType*
C,
const OrdinalType& ldc)
const
1566 OrdinalType i, j, l, NRowA =
n;
1567 bool BadArgument =
false;
1574 "Teuchos::BLAS<"<<OTNT::name()<<
","<<STNT::name()<<
">::SYRK()"
1575 " does not support CONJ_TRANS for complex data types."
1584 if ( n==izero ) {
return; }
1585 if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) {
return; }
1587 std::cout <<
"BLAS::SYRK Error: N == "<< n <<std::endl;
1588 BadArgument =
true; }
1590 std::cout <<
"BLAS::SYRK Error: K == "<< k <<std::endl;
1591 BadArgument =
true; }
1593 std::cout <<
"BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1594 BadArgument =
true; }
1596 std::cout <<
"BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1597 BadArgument =
true; }
1602 if (alpha == alpha_zero) {
1604 if (beta==beta_zero) {
1605 for (j=izero; j<
n; j++) {
1606 for (i=izero; i<=j; i++) {
1607 C[j*ldc + i] = C_zero;
1612 for (j=izero; j<
n; j++) {
1613 for (i=izero; i<=j; i++) {
1614 C[j*ldc + i] *= beta;
1620 if (beta==beta_zero) {
1621 for (j=izero; j<
n; j++) {
1622 for (i=j; i<
n; i++) {
1623 C[j*ldc + i] = C_zero;
1628 for (j=izero; j<
n; j++) {
1629 for (i=j; i<
n; i++) {
1630 C[j*ldc + i] *= beta;
1644 for (j=izero; j<
n; j++) {
1645 if (beta==beta_zero) {
1646 for (i=izero; i<=j; i++) {
1647 C[j*ldc + i] = C_zero;
1650 else if (beta!=beta_one) {
1651 for (i=izero; i<=j; i++) {
1652 C[j*ldc + i] *= beta;
1655 for (l=izero; l<k; l++) {
1656 if (A[l*lda + j] != A_zero) {
1657 temp = alpha*A[l*lda + j];
1658 for (i = izero; i <=j; i++) {
1659 C[j*ldc + i] += temp*A[l*lda + i];
1666 for (j=izero; j<
n; j++) {
1667 if (beta==beta_zero) {
1668 for (i=j; i<
n; i++) {
1669 C[j*ldc + i] = C_zero;
1672 else if (beta!=beta_one) {
1673 for (i=j; i<
n; i++) {
1674 C[j*ldc + i] *= beta;
1677 for (l=izero; l<k; l++) {
1678 if (A[l*lda + j] != A_zero) {
1679 temp = alpha*A[l*lda + j];
1680 for (i=j; i<
n; i++) {
1681 C[j*ldc + i] += temp*A[l*lda + i];
1692 for (j=izero; j<
n; j++) {
1693 for (i=izero; i<=j; i++) {
1695 for (l=izero; l<k; l++) {
1696 temp += A[i*lda + l]*A[j*lda + l];
1698 if (beta==beta_zero) {
1699 C[j*ldc + i] = alpha*temp;
1702 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1708 for (j=izero; j<
n; j++) {
1709 for (i=j; i<
n; i++) {
1711 for (l=izero; l<k; ++l) {
1712 temp += A[i*lda + l]*A[j*lda + l];
1714 if (beta==beta_zero) {
1715 C[j*ldc + i] = alpha*temp;
1718 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1727 template<
typename OrdinalType,
typename ScalarType>
1728 template <
typename alpha_type,
typename A_type>
1729 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda, ScalarType*
B,
const OrdinalType& ldb)
const
1737 OrdinalType i, j, k, NRowA = m;
1739 bool BadArgument =
false;
1745 if(!LSide) { NRowA =
n; }
1748 if (n==izero || m==izero) {
return; }
1750 std::cout <<
"BLAS::TRMM Error: M == "<< m <<std::endl;
1751 BadArgument =
true; }
1753 std::cout <<
"BLAS::TRMM Error: N == "<< n <<std::endl;
1754 BadArgument =
true; }
1756 std::cout <<
"BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1757 BadArgument =
true; }
1759 std::cout <<
"BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1760 BadArgument =
true; }
1765 if( alpha == alpha_zero ) {
1766 for( j=izero; j<
n; j++ ) {
1767 for( i=izero; i<m; i++ ) {
1768 B[j*ldb + i] = B_zero;
1783 for( j=izero; j<
n; j++ ) {
1784 for( k=izero; k<m; k++) {
1785 if ( B[j*ldb + k] != B_zero ) {
1786 temp = alpha*B[j*ldb + k];
1787 for( i=izero; i<k; i++ ) {
1788 B[j*ldb + i] += temp*A[k*lda + i];
1791 temp *=A[k*lda + k];
1792 B[j*ldb + k] = temp;
1798 for( j=izero; j<
n; j++ ) {
1799 for( k=m-ione; k>-ione; k-- ) {
1800 if( B[j*ldb + k] != B_zero ) {
1801 temp = alpha*B[j*ldb + k];
1802 B[j*ldb + k] = temp;
1804 B[j*ldb + k] *= A[k*lda + k];
1805 for( i=k+ione; i<m; i++ ) {
1806 B[j*ldb + i] += temp*A[k*lda + i];
1815 for( j=izero; j<
n; j++ ) {
1816 for( i=m-ione; i>-ione; i-- ) {
1817 temp = B[j*ldb + i];
1820 temp *= A[i*lda + i];
1821 for( k=izero; k<i; k++ ) {
1822 temp += A[i*lda + k]*B[j*ldb + k];
1827 for( k=izero; k<i; k++ ) {
1831 B[j*ldb + i] = alpha*temp;
1835 for( j=izero; j<
n; j++ ) {
1836 for( i=izero; i<m; i++ ) {
1837 temp = B[j*ldb + i];
1840 temp *= A[i*lda + i];
1841 for( k=i+ione; k<m; k++ ) {
1842 temp += A[i*lda + k]*B[j*ldb + k];
1847 for( k=i+ione; k<m; k++ ) {
1851 B[j*ldb + i] = alpha*temp;
1864 for( j=n-ione; j>-ione; j-- ) {
1867 temp *= A[j*lda + j];
1868 for( i=izero; i<m; i++ ) {
1869 B[j*ldb + i] *= temp;
1871 for( k=izero; k<j; k++ ) {
1872 if( A[j*lda + k] != A_zero ) {
1873 temp = alpha*A[j*lda + k];
1874 for( i=izero; i<m; i++ ) {
1875 B[j*ldb + i] += temp*B[k*ldb + i];
1882 for( j=izero; j<
n; j++ ) {
1885 temp *= A[j*lda + j];
1886 for( i=izero; i<m; i++ ) {
1887 B[j*ldb + i] *= temp;
1889 for( k=j+ione; k<
n; k++ ) {
1890 if( A[j*lda + k] != A_zero ) {
1891 temp = alpha*A[j*lda + k];
1892 for( i=izero; i<m; i++ ) {
1893 B[j*ldb + i] += temp*B[k*ldb + i];
1903 for( k=izero; k<
n; k++ ) {
1904 for( j=izero; j<k; j++ ) {
1905 if( A[k*lda + j] != A_zero ) {
1907 temp = alpha*A[k*lda + j];
1910 for( i=izero; i<m; i++ ) {
1911 B[j*ldb + i] += temp*B[k*ldb + i];
1918 temp *= A[k*lda + k];
1923 for( i=izero; i<m; i++ ) {
1924 B[k*ldb + i] *= temp;
1929 for( k=n-ione; k>-ione; k-- ) {
1930 for( j=k+ione; j<
n; j++ ) {
1931 if( A[k*lda + j] != A_zero ) {
1933 temp = alpha*A[k*lda + j];
1936 for( i=izero; i<m; i++ ) {
1937 B[j*ldb + i] += temp*B[k*ldb + i];
1944 temp *= A[k*lda + k];
1949 for( i=izero; i<m; i++ ) {
1950 B[k*ldb + i] *= temp;
1960 template<
typename OrdinalType,
typename ScalarType>
1961 template <
typename alpha_type,
typename A_type>
1962 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType& m,
const OrdinalType&
n,
const alpha_type alpha,
const A_type*
A,
const OrdinalType& lda, ScalarType*
B,
const OrdinalType& ldb)
const
1972 OrdinalType NRowA = m;
1973 bool BadArgument =
false;
1977 if (!(
ESideChar[side] ==
'L')) { NRowA =
n; }
1980 if (n == izero || m == izero) {
return; }
1982 std::cout <<
"BLAS::TRSM Error: M == "<<m<<std::endl;
1983 BadArgument =
true; }
1985 std::cout <<
"BLAS::TRSM Error: N == "<<n<<std::endl;
1986 BadArgument =
true; }
1988 std::cout <<
"BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1989 BadArgument =
true; }
1991 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1992 BadArgument =
true; }
1998 if(alpha == alpha_zero) {
1999 for(j = izero; j <
n; j++) {
2000 for( i = izero; i < m; i++) {
2001 B[j*ldb+i] = B_zero;
2017 for(j = izero; j <
n; j++) {
2019 if(alpha != alpha_one) {
2020 for( i = izero; i < m; i++) {
2021 B[j*ldb+i] *= alpha;
2025 for(k = (m - ione); k > -ione; k--) {
2027 if (B[j*ldb + k] != B_zero) {
2029 B[j*ldb + k] /= A[k*lda + k];
2031 for(i = izero; i < k; i++) {
2032 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2040 for(j = izero; j <
n; j++) {
2042 if(alpha != alpha_one) {
2043 for( i = izero; i < m; i++) {
2044 B[j*ldb+i] *= alpha;
2048 for(k = izero; k < m; k++) {
2050 if (B[j*ldb + k] != B_zero) {
2052 B[j*ldb + k] /= A[k*lda + k];
2054 for(i = k+ione; i < m; i++) {
2055 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2069 for(j = izero; j <
n; j++) {
2070 for( i = izero; i < m; i++) {
2071 temp = alpha*B[j*ldb+i];
2073 for(k = izero; k < i; k++) {
2074 temp -= A[i*lda + k] * B[j*ldb + k];
2077 temp /= A[i*lda + i];
2080 for(k = izero; k < i; k++) {
2088 B[j*ldb + i] = temp;
2094 for(j = izero; j <
n; j++) {
2095 for(i = (m - ione) ; i > -ione; i--) {
2096 temp = alpha*B[j*ldb+i];
2098 for(k = i+ione; k < m; k++) {
2099 temp -= A[i*lda + k] * B[j*ldb + k];
2102 temp /= A[i*lda + i];
2105 for(k = i+ione; k < m; k++) {
2113 B[j*ldb + i] = temp;
2131 for(j = izero; j <
n; j++) {
2133 if(alpha != alpha_one) {
2134 for( i = izero; i < m; i++) {
2135 B[j*ldb+i] *= alpha;
2138 for(k = izero; k < j; k++) {
2140 if (A[j*lda + k] != A_zero) {
2141 for(i = izero; i < m; i++) {
2142 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2147 temp = B_one/A[j*lda + j];
2148 for(i = izero; i < m; i++) {
2149 B[j*ldb + i] *= temp;
2156 for(j = (n - ione); j > -ione; j--) {
2158 if(alpha != alpha_one) {
2159 for( i = izero; i < m; i++) {
2160 B[j*ldb+i] *= alpha;
2164 for(k = j+ione; k <
n; k++) {
2166 if (A[j*lda + k] != A_zero) {
2167 for(i = izero; i < m; i++) {
2168 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2173 temp = B_one/A[j*lda + j];
2174 for(i = izero; i < m; i++) {
2175 B[j*ldb + i] *= temp;
2188 for(k = (n - ione); k > -ione; k--) {
2191 temp = B_one/A[k*lda + k];
2194 for(i = izero; i < m; i++) {
2195 B[k*ldb + i] *= temp;
2198 for(j = izero; j < k; j++) {
2199 if (A[k*lda + j] != A_zero) {
2201 temp = A[k*lda + j];
2204 for(i = izero; i < m; i++) {
2205 B[j*ldb + i] -= temp*B[k*ldb + i];
2209 if (alpha != alpha_one) {
2210 for (i = izero; i < m; i++) {
2211 B[k*ldb + i] *= alpha;
2218 for(k = izero; k <
n; k++) {
2221 temp = B_one/A[k*lda + k];
2224 for (i = izero; i < m; i++) {
2225 B[k*ldb + i] *= temp;
2228 for(j = k+ione; j <
n; j++) {
2229 if(A[k*lda + j] != A_zero) {
2231 temp = A[k*lda + j];
2234 for(i = izero; i < m; i++) {
2235 B[j*ldb + i] -= temp*B[k*ldb + i];
2239 if (alpha != alpha_one) {
2240 for (i = izero; i < m; i++) {
2241 B[k*ldb + i] *= alpha;
2261 void ROTG(
float* da,
float* db,
float* c,
float* s)
const;
2262 void ROT(
const int&
n,
float* dx,
const int& incx,
float* dy,
const int& incy,
float* c,
float* s)
const;
2263 float ASUM(
const int&
n,
const float* x,
const int& incx)
const;
2264 void AXPY(
const int&
n,
const float& alpha,
const float* x,
const int& incx,
float* y,
const int& incy)
const;
2265 void COPY(
const int&
n,
const float* x,
const int& incx,
float* y,
const int& incy)
const;
2266 float DOT(
const int&
n,
const float* x,
const int& incx,
const float* y,
const int& incy)
const;
2267 float NRM2(
const int&
n,
const float* x,
const int& incx)
const;
2268 void SCAL(
const int&
n,
const float& alpha,
float* x,
const int& incx)
const;
2269 int IAMAX(
const int&
n,
const float* x,
const int& incx)
const;
2270 void GEMV(
ETransp trans,
const int& m,
const int&
n,
const float& alpha,
const float*
A,
const int& lda,
const float* x,
const int& incx,
const float& beta,
float* y,
const int& incy)
const;
2271 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int&
n,
const float*
A,
const int& lda,
float* x,
const int& incx)
const;
2272 void GER(
const int& m,
const int&
n,
const float& alpha,
const float* x,
const int& incx,
const float* y,
const int& incy,
float*
A,
const int& lda)
const;
2273 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int&
n,
const int& k,
const float& alpha,
const float*
A,
const int& lda,
const float*
B,
const int& ldb,
const float& beta,
float*
C,
const int& ldc)
const;
2274 void SWAP(
const int&
n,
float*
const x,
const int& incx,
float*
const y,
const int& incy)
const;
2275 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int&
n,
const float& alpha,
const float*
A,
const int& lda,
const float*
B,
const int& ldb,
const float& beta,
float*
C,
const int& ldc)
const;
2276 void SYRK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const float& alpha,
const float*
A,
const int& lda,
const float& beta,
float*
C,
const int& ldc)
const;
2277 void HERK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const float& alpha,
const float*
A,
const int& lda,
const float& beta,
float*
C,
const int& ldc)
const;
2278 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const float& alpha,
const float*
A,
const int& lda,
float*
B,
const int& ldb)
const;
2279 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const float& alpha,
const float*
A,
const int& lda,
float*
B,
const int& ldb)
const;
2291 void ROTG(
double* da,
double* db,
double* c,
double* s)
const;
2292 void ROT(
const int&
n,
double* dx,
const int& incx,
double* dy,
const int& incy,
double* c,
double* s)
const;
2293 double ASUM(
const int&
n,
const double* x,
const int& incx)
const;
2294 void AXPY(
const int&
n,
const double& alpha,
const double* x,
const int& incx,
double* y,
const int& incy)
const;
2295 void COPY(
const int&
n,
const double* x,
const int& incx,
double* y,
const int& incy)
const;
2296 double DOT(
const int&
n,
const double* x,
const int& incx,
const double* y,
const int& incy)
const;
2297 double NRM2(
const int&
n,
const double* x,
const int& incx)
const;
2298 void SCAL(
const int&
n,
const double& alpha,
double* x,
const int& incx)
const;
2299 int IAMAX(
const int&
n,
const double* x,
const int& incx)
const;
2300 void GEMV(
ETransp trans,
const int& m,
const int&
n,
const double& alpha,
const double*
A,
const int& lda,
const double* x,
const int& incx,
const double& beta,
double* y,
const int& incy)
const;
2301 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int&
n,
const double*
A,
const int& lda,
double* x,
const int& incx)
const;
2302 void GER(
const int& m,
const int&
n,
const double& alpha,
const double* x,
const int& incx,
const double* y,
const int& incy,
double*
A,
const int& lda)
const;
2303 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int&
n,
const int& k,
const double& alpha,
const double*
A,
const int& lda,
const double*
B,
const int& ldb,
const double& beta,
double*
C,
const int& ldc)
const;
2304 void SWAP(
const int&
n,
double*
const x,
const int& incx,
double*
const y,
const int& incy)
const;
2305 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int&
n,
const double& alpha,
const double*
A,
const int& lda,
const double*
B,
const int& ldb,
const double& beta,
double*
C,
const int& ldc)
const;
2306 void SYRK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const double& alpha,
const double*
A,
const int& lda,
const double& beta,
double*
C,
const int& ldc)
const;
2307 void HERK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const double& alpha,
const double*
A,
const int& lda,
const double& beta,
double*
C,
const int& ldc)
const;
2308 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const double& alpha,
const double*
A,
const int& lda,
double*
B,
const int& ldb)
const;
2309 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const double& alpha,
const double*
A,
const int& lda,
double*
B,
const int& ldb)
const;
2319 inline BLAS(
const BLAS<
int, std::complex<float> >& ) {}
2321 void ROTG(std::complex<float>* da, std::complex<float>* db,
float* c, std::complex<float>* s)
const;
2322 void ROT(
const int&
n, std::complex<float>* dx,
const int& incx, std::complex<float>* dy,
const int& incy,
float* c, std::complex<float>* s)
const;
2323 float ASUM(
const int&
n,
const std::complex<float>* x,
const int& incx)
const;
2324 void AXPY(
const int&
n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int& incx, std::complex<float>* y,
const int& incy)
const;
2325 void COPY(
const int&
n,
const std::complex<float>* x,
const int& incx, std::complex<float>* y,
const int& incy)
const;
2326 std::complex<float> DOT(
const int&
n,
const std::complex<float>* x,
const int& incx,
const std::complex<float>* y,
const int& incy)
const;
2327 float NRM2(
const int&
n,
const std::complex<float>* x,
const int& incx)
const;
2328 void SCAL(
const int&
n,
const std::complex<float> alpha, std::complex<float>* x,
const int& incx)
const;
2329 int IAMAX(
const int&
n,
const std::complex<float>* x,
const int& incx)
const;
2330 void GEMV(
ETransp trans,
const int& m,
const int&
n,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda,
const std::complex<float>* x,
const int& incx,
const std::complex<float> beta, std::complex<float>* y,
const int& incy)
const;
2331 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int&
n,
const std::complex<float>*
A,
const int& lda, std::complex<float>* x,
const int& incx)
const;
2332 void GER(
const int& m,
const int&
n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int& incx,
const std::complex<float>* y,
const int& incy, std::complex<float>*
A,
const int& lda)
const;
2333 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int&
n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda,
const std::complex<float>*
B,
const int& ldb,
const std::complex<float> beta, std::complex<float>*
C,
const int& ldc)
const;
2334 void SWAP(
const int&
n, std::complex<float>*
const x,
const int& incx, std::complex<float>*
const y,
const int& incy)
const;
2335 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int&
n,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda,
const std::complex<float> *
B,
const int& ldb,
const std::complex<float> beta, std::complex<float> *
C,
const int& ldc)
const;
2336 void SYRK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda,
const std::complex<float> beta, std::complex<float>*
C,
const int& ldc)
const;
2337 void HERK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda,
const std::complex<float> beta, std::complex<float>*
C,
const int& ldc)
const;
2338 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda, std::complex<float>*
B,
const int& ldb)
const;
2339 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const std::complex<float> alpha,
const std::complex<float>*
A,
const int& lda, std::complex<float>*
B,
const int& ldb)
const;
2349 inline BLAS(
const BLAS<
int, std::complex<double> >& ) {}
2351 void ROTG(std::complex<double>* da, std::complex<double>* db,
double* c, std::complex<double>* s)
const;
2352 void ROT(
const int&
n, std::complex<double>* dx,
const int& incx, std::complex<double>* dy,
const int& incy,
double* c, std::complex<double>* s)
const;
2353 double ASUM(
const int&
n,
const std::complex<double>* x,
const int& incx)
const;
2354 void AXPY(
const int&
n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int& incx, std::complex<double>* y,
const int& incy)
const;
2355 void COPY(
const int&
n,
const std::complex<double>* x,
const int& incx, std::complex<double>* y,
const int& incy)
const;
2356 std::complex<double> DOT(
const int&
n,
const std::complex<double>* x,
const int& incx,
const std::complex<double>* y,
const int& incy)
const;
2357 double NRM2(
const int&
n,
const std::complex<double>* x,
const int& incx)
const;
2358 void SCAL(
const int&
n,
const std::complex<double> alpha, std::complex<double>* x,
const int& incx)
const;
2359 int IAMAX(
const int&
n,
const std::complex<double>* x,
const int& incx)
const;
2360 void GEMV(
ETransp trans,
const int& m,
const int&
n,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda,
const std::complex<double>* x,
const int& incx,
const std::complex<double> beta, std::complex<double>* y,
const int& incy)
const;
2361 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int&
n,
const std::complex<double>*
A,
const int& lda, std::complex<double>* x,
const int& incx)
const;
2362 void GER(
const int& m,
const int&
n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int& incx,
const std::complex<double>* y,
const int& incy, std::complex<double>*
A,
const int& lda)
const;
2363 void GEMM(
ETransp transa,
ETransp transb,
const int& m,
const int&
n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda,
const std::complex<double>*
B,
const int& ldb,
const std::complex<double> beta, std::complex<double>*
C,
const int& ldc)
const;
2364 void SWAP(
const int&
n, std::complex<double>*
const x,
const int& incx, std::complex<double>*
const y,
const int& incy)
const;
2365 void SYMM(
ESide side,
EUplo uplo,
const int& m,
const int&
n,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda,
const std::complex<double> *
B,
const int& ldb,
const std::complex<double> beta, std::complex<double> *
C,
const int& ldc)
const;
2366 void SYRK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda,
const std::complex<double> beta, std::complex<double>*
C,
const int& ldc)
const;
2367 void HERK(
EUplo uplo,
ETransp trans,
const int&
n,
const int& k,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda,
const std::complex<double> beta, std::complex<double>*
C,
const int& ldc)
const;
2368 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda, std::complex<double>*
B,
const int& ldb)
const;
2369 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int& m,
const int&
n,
const std::complex<double> alpha,
const std::complex<double>*
A,
const int& lda, std::complex<double>*
B,
const int& ldb)
const;
2374 #endif // _TEUCHOS_BLAS_HPP_
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
static T one()
Returns representation of one for this ordinal type.
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y <- y+alpha*x.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
Enumerated types for BLAS input characters.
virtual ~DefaultBLASImpl(void)
Destructor.
virtual ~BLAS(void)
Destructor.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/low...
static T zero()
Returns representation of zero for this ordinal type.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
ScalarType SIGN(const ScalarType &x, const ScalarType &y) const
Teuchos::ScalarTraits< ScalarType >::magnitudeType MagnitudeType
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
BLAS(void)
Default constructor.
DefaultBLASImpl(void)
Default constructor.
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
This structure defines some basic traits for a scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
#define TEUCHOSNUMERICS_LIB_DLL_EXPORT
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
static T conjugate(T a)
Returns the conjugate of the scalar type a.
BLAS(const BLAS< int, std::complex< double > > &)
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[]
ScalarTraits< ScalarType >::magnitudeType c_type
void blas_dabs1(const ScalarType *a, typename ScalarTraits< ScalarType >::magnitudeType *ret) const
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
BLAS(const BLAS< int, std::complex< float > > &)
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
Defines basic traits for the ordinal field type.
Default traits class that just returns typeid(T).name().
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
Default implementation for BLAS routines.
Teuchos::ScalarTraits< ScalarType >::magnitudeType MagnitudeType
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
static T one()
Returns representation of one for this scalar type.
BLAS(const BLAS< int, double > &)
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.
BLAS(const BLAS< int, float > &)