43 #ifdef HAVE_TEUCHOSCORE_QUADMATH
45 #endif // HAVE_TEUCHOSCORE_QUADMATH
48 #ifdef HAVE_TEUCHOSCORE_QUADMATH
54 GETRF (
const int M,
const int N, __float128
A[],
55 const int LDA,
int IPIV[],
int* INFO)
const
69 }
else if (LDA < std::max (1, M)) {
77 if (M == 0 || N == 0) {
84 const __float128 sfmin = FLT128_MIN;
85 const __float128 zero = 0.0;
86 const __float128 one = 1.0;
88 const int j_upperBound = std::min (M, N);
89 for (
int j = 1; j <= j_upperBound; ++j) {
93 __float128*
const A_jj = A + (j-1)*LDA + (j-1);
96 const int jp = (j - 1) + blas.
IAMAX (M - j + 1, A_jj, 1);
99 const __float128* A_jp_j = A + jp + LDA*j;
100 if (*A_jp_j != zero) {
102 __float128*
const A_j1 = A + (j - 1);
103 __float128*
const A_jp_1 = A + (jp - 1);
106 blas.
SWAP (N, A_j1, LDA, A_jp_1, LDA);
111 __float128*
const A_j1_j = A + j + (j-1)*LDA;
113 if (fabsq (*A_jj) >= sfmin) {
114 blas.
SCAL (M-j, one / *A_jj, A_j1_j, 1);
116 for (
int i = 1; i <= M-j; ++i) {
117 __float128*
const A_jpi_j = A + (j+i-1) + (j-1)*LDA;
122 }
else if (*INFO == 0) {
126 if (j < std::min (M, N)) {
130 const __float128* A_j1_j = A + j + (j-1)*LDA;
131 const __float128* A_j_j1 = A + (j-1) + j*LDA;
132 __float128* A_j1_j1 = A + j + j*LDA;
133 blas.
GER (M-j, N-j, -one, A_j1_j, 1, A_j_j1, LDA, A_j1_j1, LDA);
140 LASWP (
const int N, __float128 A[],
const int LDA,
const int K1,
141 const int K2,
const int IPIV[],
const int INCX)
const
143 int i, i1, i2, inc, ip, ix, ix0, j, k, n32;
153 }
else if (INCX < 0) {
154 ix0 = 1 + (1 - K2)*INCX;
167 for (j = 1; j <= n32; j += 32) {
172 for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
175 for (k = j; k <= j+31; ++k) {
176 temp = A[(i-1) + (k-1)*LDA];
177 A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA];
178 A[(ip-1) + (k-1)*LDA] = temp;
192 for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
195 for (k = n32; k <= N; ++k) {
196 temp = A[(i-1) + (k-1)*LDA];
197 A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA];
198 A[(ip-1) + (k-1)*LDA] = temp;
208 GETRI (
const int , __float128 [],
const int ,
209 int [], __float128 [],
const int ,
213 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GETRI: Not implemented yet.");
219 GETRS (
const char TRANS,
const int N,
const int NRHS,
220 const __float128 A[],
const int LDA,
const int IPIV[],
221 __float128
B[],
const int LDB,
int* INFO)
const
231 const bool notran = (TRANS ==
'N' || TRANS ==
'n');
233 && ! (TRANS ==
'T' || TRANS ==
't')
234 && ! (TRANS ==
'C' || TRANS ==
'c')) {
243 else if (LDA < std::max (1, N)) {
246 else if (LDB < std::max (1, N)) {
253 const __float128 one = 1.0;
267 LASWP (NRHS, B, LDB, 1, N, IPIV, 1);
271 one, A, LDA, B, LDB);
275 one, A, LDA, B, LDB);
284 one, A, LDA, B, LDB);
288 one, A, LDA, B, LDB);
291 LASWP (NRHS, B, LDB, 1, N, IPIV, -1);
299 LAPY2 (
const __float128& x,
const __float128& y)
const
301 const __float128 xabs = fabsq (x);
302 const __float128 yabs = fabsq (y);
303 const __float128 w = fmaxq (xabs, yabs);
304 const __float128 z = fminq (xabs, yabs);
309 const __float128 one = 1.0;
310 const __float128 z_div_w = z / w;
311 return w * sqrtq (one + z_div_w * z_div_w);
317 ORM2R (
const char side,
const char trans,
318 const int m,
const int n,
const int k,
319 const __float128 A[],
const int lda,
320 const __float128*
const tau,
321 __float128
C[],
const int ldc,
322 __float128 work[],
int*
const info)
const
330 ILADLC (
const int m,
const int n,
const __float128 A[],
const int lda)
332 const __float128 zero = 0.0;
337 }
else if (A[0 + (n-1)*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
341 for (
int j = n; j > 0; --j) {
342 for (
int i = 1; i <= m; ++i) {
343 if (A[(i-1) + (j-1)*lda] != zero) {
353 ILADLR (
const int m,
const int n,
const __float128 A[],
const int lda)
355 const __float128 zero = 0.0;
360 }
else if (A[(m-1) + 0*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
365 for (
int j = 1; j <=
n; ++j) {
367 while (A[(std::max (i, 1) - 1) + (j - 1)*lda] == zero && i >= 1) {
370 lastZeroRow = std::max (lastZeroRow, i);
379 LARF (
const char side,
382 const __float128 v[],
384 const __float128 tau,
387 __float128 work[])
const
389 const __float128 zero = 0.0;
390 const __float128 one = 1.0;
392 const bool applyLeft = (side ==
'L');
405 i = 1 + (lastv - 1) * incv;
410 while (lastv > 0 && v[i-1] == zero) {
416 lastc = ILADLC (lastv, n, C, ldc);
419 lastc = ILADLR (m, lastv, C, ldc);
432 blas.
GER (lastv, lastc, -tau, v, incv, work, 1, C, ldc);
440 v, incv, zero, work, 1);
442 blas.
GER (lastc, lastv, -tau, work, 1, v, incv, C, ldc);
450 LARFG (
const int N, __float128*
const ALPHA,
451 __float128 X[],
const int INCX, __float128*
const TAU)
const
455 const __float128 zero = 0.0;
456 const __float128 one = 1.0;
457 const __float128 two = 2.0;
464 __float128 xnorm = blas.
NRM2 (N-1, X, INCX);
468 if (*ALPHA >= zero) {
476 for (
int j = 0; j < N; ++j) {
483 __float128 beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
484 const __float128 smlnum = FLT128_MIN / FLT128_EPSILON;
487 if (fabsq (beta) < smlnum) {
490 __float128 bignum = one / smlnum;
493 blas.
SCAL (N-1, bignum, X, INCX);
495 *ALPHA = *ALPHA*bignum;
496 }
while (fabsq(beta) < smlnum);
499 xnorm = blas.
NRM2 (N-1, X, INCX);
500 beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
503 __float128 savealpha = *ALPHA;
504 *ALPHA = *ALPHA + beta;
507 *TAU = -*ALPHA / beta;
509 *ALPHA = xnorm * (xnorm / *ALPHA);
510 *TAU = *ALPHA / beta;
514 if (fabsq (*TAU) <= smlnum) {
523 if (savealpha >= zero) {
527 for (
int j = 0; j < N; ++j) {
534 blas.
SCAL (N-1, one / *ALPHA, X, INCX);
537 for (
int j = 1; j <= knt; ++j) {
555 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
567 int*
const INFO)
const
575 WORK[0] =
static_cast<__float128
> (N);
578 GEQR2 (M, N, A, LDA, TAU, WORK, INFO);
595 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
611 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
616 LASCL (
const char TYPE,
619 const __float128 cfrom,
620 const __float128 cto,
628 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::LASCL: Not implemented yet.");
643 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GBTRF: Not implemented yet.");
648 GBTRS (
const char TRANS,
661 (
true, std::logic_error,
"Teuchos::LAPACK<int, __float128>::GBTRS: Not implemented yet.");
666 #endif // HAVE_TEUCHOSCORE_QUADMATH
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
Declaration and definition of Teuchos::Details::Lapack128, a partial implementation of Teuchos::LAPAC...
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a gene...
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.