11 #ifdef HAVE_TEUCHOSCORE_QUADMATH
13 #endif // HAVE_TEUCHOSCORE_QUADMATH
16 #ifdef HAVE_TEUCHOSCORE_QUADMATH
22 GETRF (
const int M,
const int N, __float128 A[],
23 const int LDA,
int IPIV[],
int* INFO)
const
37 }
else if (LDA < std::max (1, M)) {
45 if (M == 0 || N == 0) {
52 const __float128 sfmin = FLT128_MIN;
53 const __float128 zero = 0.0;
54 const __float128 one = 1.0;
56 const int j_upperBound = std::min (M, N);
57 for (
int j = 1; j <= j_upperBound; ++j) {
61 __float128*
const A_jj = A + (j-1)*LDA + (j-1);
64 const int jp = (j - 1) + blas.
IAMAX (M - j + 1, A_jj, 1);
67 const __float128* A_jp_j = A + jp + LDA*j;
68 if (*A_jp_j != zero) {
70 __float128*
const A_j1 = A + (j - 1);
71 __float128*
const A_jp_1 = A + (jp - 1);
74 blas.
SWAP (N, A_j1, LDA, A_jp_1, LDA);
79 __float128*
const A_j1_j = A + j + (j-1)*LDA;
81 if (fabsq (*A_jj) >= sfmin) {
82 blas.
SCAL (M-j, one / *A_jj, A_j1_j, 1);
84 for (
int i = 1; i <= M-j; ++i) {
85 __float128*
const A_jpi_j = A + (j+i-1) + (j-1)*LDA;
90 }
else if (*INFO == 0) {
94 if (j < std::min (M, N)) {
98 const __float128* A_j1_j = A + j + (j-1)*LDA;
99 const __float128* A_j_j1 = A + (j-1) + j*LDA;
100 __float128* A_j1_j1 = A + j + j*LDA;
101 blas.
GER (M-j, N-j, -one, A_j1_j, 1, A_j_j1, LDA, A_j1_j1, LDA);
108 LASWP (
const int N, __float128 A[],
const int LDA,
const int K1,
109 const int K2,
const int IPIV[],
const int INCX)
const
111 int i, i1, i2, inc, ip, ix, ix0, j, k, n32;
121 }
else if (INCX < 0) {
122 ix0 = 1 + (1 - K2)*INCX;
135 for (j = 1; j <= n32; j += 32) {
140 for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
143 for (k = j; k <= j+31; ++k) {
144 temp = A[(i-1) + (k-1)*LDA];
145 A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA];
146 A[(ip-1) + (k-1)*LDA] = temp;
160 for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
163 for (k = n32; k <= N; ++k) {
164 temp = A[(i-1) + (k-1)*LDA];
165 A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA];
166 A[(ip-1) + (k-1)*LDA] = temp;
176 GETRI (
const int , __float128 [],
const int ,
177 int [], __float128 [],
const int ,
181 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GETRI: Not implemented yet.");
187 GETRS (
const char TRANS,
const int N,
const int NRHS,
188 const __float128 A[],
const int LDA,
const int IPIV[],
189 __float128 B[],
const int LDB,
int* INFO)
const
199 const bool notran = (TRANS ==
'N' || TRANS ==
'n');
201 && ! (TRANS ==
'T' || TRANS ==
't')
202 && ! (TRANS ==
'C' || TRANS ==
'c')) {
211 else if (LDA < std::max (1, N)) {
214 else if (LDB < std::max (1, N)) {
221 const __float128 one = 1.0;
235 LASWP (NRHS, B, LDB, 1, N, IPIV, 1);
239 one, A, LDA, B, LDB);
243 one, A, LDA, B, LDB);
252 one, A, LDA, B, LDB);
256 one, A, LDA, B, LDB);
259 LASWP (NRHS, B, LDB, 1, N, IPIV, -1);
267 LAPY2 (
const __float128& x,
const __float128& y)
const
269 const __float128 xabs = fabsq (x);
270 const __float128 yabs = fabsq (y);
271 const __float128 w = fmaxq (xabs, yabs);
272 const __float128 z = fminq (xabs, yabs);
277 const __float128 one = 1.0;
278 const __float128 z_div_w = z / w;
279 return w * sqrtq (one + z_div_w * z_div_w);
285 ORM2R (
const char side,
const char trans,
286 const int m,
const int n,
const int k,
287 const __float128 A[],
const int lda,
288 const __float128*
const tau,
289 __float128 C[],
const int ldc,
290 __float128 work[],
int*
const info)
const
298 ILADLC (
const int m,
const int n,
const __float128 A[],
const int lda)
300 const __float128 zero = 0.0;
305 }
else if (A[0 + (n-1)*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
309 for (
int j = n; j > 0; --j) {
310 for (
int i = 1; i <= m; ++i) {
311 if (A[(i-1) + (j-1)*lda] != zero) {
321 ILADLR (
const int m,
const int n,
const __float128 A[],
const int lda)
323 const __float128 zero = 0.0;
328 }
else if (A[(m-1) + 0*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
333 for (
int j = 1; j <= n; ++j) {
335 while (A[(std::max (i, 1) - 1) + (j - 1)*lda] == zero && i >= 1) {
338 lastZeroRow = std::max (lastZeroRow, i);
347 LARF (
const char side,
350 const __float128 v[],
352 const __float128 tau,
355 __float128 work[])
const
357 const __float128 zero = 0.0;
358 const __float128 one = 1.0;
360 const bool applyLeft = (side ==
'L');
373 i = 1 + (lastv - 1) * incv;
378 while (lastv > 0 && v[i-1] == zero) {
384 lastc = ILADLC (lastv, n, C, ldc);
387 lastc = ILADLR (m, lastv, C, ldc);
400 blas.
GER (lastv, lastc, -tau, v, incv, work, 1, C, ldc);
408 v, incv, zero, work, 1);
410 blas.
GER (lastc, lastv, -tau, work, 1, v, incv, C, ldc);
418 LARFG (
const int N, __float128*
const ALPHA,
419 __float128 X[],
const int INCX, __float128*
const TAU)
const
423 const __float128 zero = 0.0;
424 const __float128 one = 1.0;
425 const __float128 two = 2.0;
432 __float128 xnorm = blas.
NRM2 (N-1, X, INCX);
436 if (*ALPHA >= zero) {
444 for (
int j = 0; j < N; ++j) {
451 __float128 beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
452 const __float128 smlnum = FLT128_MIN / FLT128_EPSILON;
455 if (fabsq (beta) < smlnum) {
458 __float128 bignum = one / smlnum;
461 blas.
SCAL (N-1, bignum, X, INCX);
463 *ALPHA = *ALPHA*bignum;
464 }
while (fabsq(beta) < smlnum);
467 xnorm = blas.
NRM2 (N-1, X, INCX);
468 beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
471 __float128 savealpha = *ALPHA;
472 *ALPHA = *ALPHA + beta;
475 *TAU = -*ALPHA / beta;
477 *ALPHA = xnorm * (xnorm / *ALPHA);
478 *TAU = *ALPHA / beta;
482 if (fabsq (*TAU) <= smlnum) {
491 if (savealpha >= zero) {
495 for (
int j = 0; j < N; ++j) {
502 blas.
SCAL (N-1, one / *ALPHA, X, INCX);
505 for (
int j = 1; j <= knt; ++j) {
523 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
535 int*
const INFO)
const
543 WORK[0] =
static_cast<__float128
> (N);
546 GEQR2 (M, N, A, LDA, TAU, WORK, INFO);
563 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
579 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
584 LASCL (
const char TYPE,
587 const __float128 cfrom,
588 const __float128 cto,
596 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::LASCL: Not implemented yet.");
611 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GBTRF: Not implemented yet.");
616 GBTRS (
const char TRANS,
629 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GBTRS: Not implemented yet.");
634 #endif // HAVE_TEUCHOSCORE_QUADMATH
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
Declaration and definition of Teuchos::Details::Lapack128, a partial implementation of Teuchos::LAPAC...
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a gene...
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.