Teuchos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_Details_Lapack128.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
43 #ifdef HAVE_TEUCHOSCORE_QUADMATH
44 # include "Teuchos_BLAS.hpp"
45 #endif // HAVE_TEUCHOSCORE_QUADMATH
46 
47 
48 #ifdef HAVE_TEUCHOSCORE_QUADMATH
49 namespace Teuchos {
50 namespace Details {
51 
52 void
53 Lapack128::
54 GETRF (const int M, const int N, __float128 A[],
55  const int LDA, int IPIV[], int* INFO) const
56 {
57  //std::cerr << "GETRF: N = " << N << std::endl;
58 
60 
61  // NOTE (mfh 05 Sep 2015) This is a direct translation of LAPACK
62  // 3.5.0's DGETF2 routine. LAPACK is under a BSD license.
63 
64  *INFO = 0;
65  if (M < 0) {
66  *INFO = -1;
67  } else if (N < 0) {
68  *INFO = -2;
69  } else if (LDA < std::max (1, M)) {
70  *INFO = -4;
71  }
72  if (*INFO != 0) {
73  return;
74  }
75 
76  // Quick return if possible
77  if (M == 0 || N == 0) {
78  return;
79  }
80 
81  // Compute machine safe minimum sfmin (such that 1/sfmin does
82  // not overflow). LAPACK 3.1 just returns for this the smallest
83  // normalized number.
84  const __float128 sfmin = FLT128_MIN;
85  const __float128 zero = 0.0;
86  const __float128 one = 1.0;
87 
88  const int j_upperBound = std::min (M, N);
89  for (int j = 1; j <= j_upperBound; ++j) {
90  //std::cerr << " j = " << j << std::endl;
91 
92  // Find pivot and test for singularity.
93  __float128* const A_jj = A + (j-1)*LDA + (j-1);
94 
95  //std::cerr << " CALLING IAMAX" << std::endl;
96  const int jp = (j - 1) + blas.IAMAX (M - j + 1, A_jj, 1);
97  IPIV[j - 1] = jp;
98 
99  const __float128* A_jp_j = A + jp + LDA*j;
100  if (*A_jp_j != zero) {
101  // Apply the interchange to columns 1:N.
102  __float128* const A_j1 = A + (j - 1);
103  __float128* const A_jp_1 = A + (jp - 1);
104 
105  if (jp != j) {
106  blas.SWAP (N, A_j1, LDA, A_jp_1, LDA);
107  }
108 
109  // Compute elements J+1:M of J-th column.
110  if (j < M) {
111  __float128* const A_j1_j = A + j + (j-1)*LDA;
112 
113  if (fabsq (*A_jj) >= sfmin) {
114  blas.SCAL (M-j, one / *A_jj, A_j1_j, 1);
115  } else {
116  for (int i = 1; i <= M-j; ++i) {
117  __float128* const A_jpi_j = A + (j+i-1) + (j-1)*LDA;
118  *A_jpi_j /= *A_jj;
119  }
120  }
121  }
122  } else if (*INFO == 0) {
123  *INFO = j;
124  }
125 
126  if (j < std::min (M, N)) {
127  //std::cerr << " UPDATE TRAILING SUBMATRIX" << std::endl;
128 
129  // Update trailing submatrix.
130  const __float128* A_j1_j = A + j + (j-1)*LDA;
131  const __float128* A_j_j1 = A + (j-1) + j*LDA;
132  __float128* A_j1_j1 = A + j + j*LDA;
133  blas.GER (M-j, N-j, -one, A_j1_j, 1, A_j_j1, LDA, A_j1_j1, LDA);
134  }
135  }
136 }
137 
138 void
139 Lapack128::
140 LASWP (const int N, __float128 A[], const int LDA, const int K1,
141  const int K2, const int IPIV[], const int INCX) const
142 {
143  int i, i1, i2, inc, ip, ix, ix0, j, k, n32;
144  __float128 temp;
145 
146  // Interchange row I with row IPIV(I) for each of rows K1 through K2.
147 
148  if (INCX > 0) {
149  ix0 = K1;
150  i1 = K1;
151  i2 = K2;
152  inc = 1;
153  } else if (INCX < 0) {
154  ix0 = 1 + (1 - K2)*INCX;
155  i1 = K2;
156  i2 = K1;
157  inc = -1;
158  } else { // INCX == 0
159  return;
160  }
161 
162  // The LAPACK 3.5.0 source code does 32 entries at a time,
163  // presumably for better vectorization or cache line usage.
164  n32 = (N / 32) * 32;
165 
166  if (n32 != 0) {
167  for (j = 1; j <= n32; j += 32) {
168  ix = ix0;
169  // C and C++ lack Fortran's convenient range specifier,
170  // which can iterate over a range in either direction
171  // without particular fuss about the end condition.
172  for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
173  ip = IPIV[ix-1];
174  if (ip != i) {
175  for (k = j; k <= j+31; ++k) {
176  temp = A[(i-1) + (k-1)*LDA]; // temp = a( i, k )
177  A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA]; // a( i, k ) = a( ip, k )
178  A[(ip-1) + (k-1)*LDA] = temp; // a( ip, k ) = temp
179  }
180  }
181  ix = ix + INCX;
182  }
183  }
184  }
185 
186  if (n32 != N) {
187  n32 = n32 + 1;
188  ix = ix0;
189  // C and C++ lack Fortran's convenient range specifier,
190  // which can iterate over a range in either direction
191  // without particular fuss about the end condition.
192  for (i = i1; (inc > 0) ? (i <= i2) : (i >= i2); i += inc) {
193  ip = IPIV[ix-1];
194  if (ip != i) {
195  for (k = n32; k <= N; ++k) {
196  temp = A[(i-1) + (k-1)*LDA]; // temp = a( i, k )
197  A[(i-1) + (k-1)*LDA] = A[(ip-1) + (k-1)*LDA]; // a( i, k ) = a( ip, k )
198  A[(ip-1) + (k-1)*LDA] = temp; // a( ip, k ) = temp
199  }
200  }
201  ix = ix + INCX;
202  }
203  }
204 }
205 
206 void
207 Lapack128::
208 GETRI (const int /* N */, __float128 /* A */ [], const int /* LDA */,
209  int /* IPIV */ [], __float128 /* WORK */ [], const int /* LWORK */,
210  int* /* INFO */) const
211 {
213  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GETRI: Not implemented yet.");
214 }
215 
216 
217 void
218 Lapack128::
219 GETRS (const char TRANS, const int N, const int NRHS,
220  const __float128 A[], const int LDA, const int IPIV[],
221  __float128 B[], const int LDB, int* INFO) const
222 {
223  //std::cerr << "GETRS: N = " << N << std::endl;
224 
226 
227  // NOTE (mfh 05 Sep 2015) This is a direct translation of LAPACK
228  // 3.5.0's DGETRS routine. LAPACK is under a BSD license.
229 
230  *INFO = 0;
231  const bool notran = (TRANS == 'N' || TRANS == 'n');
232  if (! notran
233  && ! (TRANS == 'T' || TRANS == 't')
234  && ! (TRANS == 'C' || TRANS == 'c')) {
235  *INFO = -1; // invalid TRANS argument
236  }
237  else if (N < 0) {
238  *INFO = -2; // invalid N (negative)
239  }
240  else if (NRHS < 0) {
241  *INFO = -3; // invalid NRHS (negative)
242  }
243  else if (LDA < std::max (1, N)) {
244  *INFO = -5; // invalid LDA (too small)
245  }
246  else if (LDB < std::max (1, N)) {
247  *INFO = -8; // invalid LDB (too small)
248  }
249  if (*INFO != 0) {
250  return;
251  }
252 
253  const __float128 one = 1.0;
254 
255  using Teuchos::LEFT_SIDE;
256  using Teuchos::LOWER_TRI;
257  using Teuchos::UPPER_TRI;
258  using Teuchos::NO_TRANS;
259  using Teuchos::TRANS;
260  using Teuchos::CONJ_TRANS;
261  using Teuchos::UNIT_DIAG;
263 
264  if (notran) { // No transpose; solve AX=B
265  // Apply row interchanges to the right-hand sides.
266  //std::cerr << "CALLING LASWP" << std::endl;
267  LASWP (NRHS, B, LDB, 1, N, IPIV, 1);
268  // Solve L*X = B, overwriting B with X.
269  //std::cerr << "CALLING TRSM (1)" << std::endl;
270  blas.TRSM (LEFT_SIDE, LOWER_TRI, NO_TRANS, UNIT_DIAG, N, NRHS,
271  one, A, LDA, B, LDB);
272  // Solve U*X = B, overwriting B with X.
273  //std::cerr << "CALLING TRSM (2)" << std::endl;
274  blas.TRSM (LEFT_SIDE, UPPER_TRI, NO_TRANS, NON_UNIT_DIAG, N, NRHS,
275  one, A, LDA, B, LDB);
276  }
277  else { // Transpose or conjugate transpose: solve A^{T,H}X = B.
278  const Teuchos::ETransp transposeMode = (TRANS == 'T' || TRANS == 't') ?
279  TRANS : CONJ_TRANS;
280 
281  // Solve U^{T,H}*X = B, overwriting B with X.
282  //std::cerr << "CALLING TRSM (1)" << std::endl;
283  blas.TRSM (LEFT_SIDE, UPPER_TRI, transposeMode, NON_UNIT_DIAG, N, NRHS,
284  one, A, LDA, B, LDB);
285  // Solve L^{T,H}*X = B, overwriting B with X.
286  //std::cerr << "CALLING TRSM (2)" << std::endl;
287  blas.TRSM (LEFT_SIDE, LOWER_TRI, transposeMode, UNIT_DIAG, N, NRHS,
288  one, A, LDA, B, LDB);
289  //std::cerr << "CALLING LASWP" << std::endl;
290  // Apply row interchanges to the solution vectors.
291  LASWP (NRHS, B, LDB, 1, N, IPIV, -1);
292  }
293 
294  //std::cerr << "DONE WITH GETRS" << std::endl;
295 }
296 
297 __float128
298 Lapack128::
299 LAPY2 (const __float128& x, const __float128& y) const
300 {
301  const __float128 xabs = fabsq (x);
302  const __float128 yabs = fabsq (y);
303  const __float128 w = fmaxq (xabs, yabs);
304  const __float128 z = fminq (xabs, yabs);
305 
306  if (z == 0.0) {
307  return w;
308  } else {
309  const __float128 one = 1.0;
310  const __float128 z_div_w = z / w;
311  return w * sqrtq (one + z_div_w * z_div_w);
312  }
313 }
314 
315 void
316 Lapack128::
317 ORM2R (const char side, const char trans,
318  const int m, const int n, const int k,
319  const __float128 A[], const int lda,
320  const __float128* const tau,
321  __float128 C[], const int ldc,
322  __float128 work[], int* const info) const
323 {
324  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Not implemented");
325 }
326 
327 namespace { // (anonymous)
328 
329  int
330  ILADLC (const int m, const int n, const __float128 A[], const int lda)
331  {
332  const __float128 zero = 0.0;
333 
334  // Quick test for the common case where one corner is non-zero.
335  if (n == 0) {
336  return n;
337  } else if (A[0 + (n-1)*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
338  return n;
339  } else {
340  // Now scan each column from the end, returning with the first non-zero.
341  for (int j = n; j > 0; --j) {
342  for (int i = 1; i <= m; ++i) {
343  if (A[(i-1) + (j-1)*lda] != zero) {
344  return j;
345  }
346  }
347  }
348  return 0;
349  }
350  }
351 
352  int
353  ILADLR (const int m, const int n, const __float128 A[], const int lda)
354  {
355  const __float128 zero = 0.0;
356 
357  // Quick test for the common case where one corner is non-zero.
358  if (m == 0) {
359  return m;
360  } else if (A[(m-1) + 0*lda] != zero || A[(m-1) + (n-1)*lda] != zero) {
361  return m;
362  } else {
363  // Scan up each column tracking the last zero row seen.
364  int lastZeroRow = 0;
365  for (int j = 1; j <= n; ++j) {
366  int i = m;
367  while (A[(std::max (i, 1) - 1) + (j - 1)*lda] == zero && i >= 1) {
368  i--;
369  }
370  lastZeroRow = std::max (lastZeroRow, i);
371  }
372  return lastZeroRow;
373  }
374  }
375 } // namespace (anonymous)
376 
377 void
378 Lapack128::
379 LARF (const char side,
380  const int m,
381  const int n,
382  const __float128 v[],
383  const int incv,
384  const __float128 tau,
385  __float128 C[],
386  const int ldc,
387  __float128 work[]) const
388 {
389  const __float128 zero = 0.0;
390  const __float128 one = 1.0;
392  const bool applyLeft = (side == 'L');
393  int lastv = 0;
394  int lastc = 0;
395  int i = 0;
396 
397  if (tau != zero) {
398  // Set up variables for scanning V. LASTV begins pointing to the end of V.
399  if (applyLeft) {
400  lastv = m;
401  } else {
402  lastv = n;
403  }
404  if (incv > 0) {
405  i = 1 + (lastv - 1) * incv;
406  } else {
407  i = 1;
408  }
409  // Look for the last non-zero row in V.
410  while (lastv > 0 && v[i-1] == zero) {
411  lastv = lastv - 1;
412  i = i - incv;
413  }
414  if (applyLeft) {
415  // Scan for the last non-zero column in C(1:lastv,:).
416  lastc = ILADLC (lastv, n, C, ldc);
417  } else {
418  // Scan for the last non-zero row in C(:,1:lastv).
419  lastc = ILADLR (m, lastv, C, ldc);
420  }
421  }
422 
423  // Note that lastc == 0 renders the BLAS operations null; no special
424  // case is needed at this level.
425  if (applyLeft) {
426  // Form H * C
427  if (lastv > 0) {
428  // w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1)
429  blas.GEMV (Teuchos::TRANS, lastv, lastc, one, C, ldc, v, incv,
430  zero, work, 1);
431  // C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T
432  blas.GER (lastv, lastc, -tau, v, incv, work, 1, C, ldc);
433  }
434  }
435  else {
436  // Form C * H
437  if (lastv > 0) {
438  // w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1)
439  blas.GEMV (Teuchos::NO_TRANS, lastc, lastv, one, C, ldc,
440  v, incv, zero, work, 1);
441  // C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T
442  blas.GER (lastc, lastv, -tau, work, 1, v, incv, C, ldc);
443  }
444  }
445 }
446 
447 
448 void
449 Lapack128::
450 LARFG (const int N, __float128* const ALPHA,
451  __float128 X[], const int INCX, __float128* const TAU) const
452 {
453  // This is actually LARFGP.
454 
455  const __float128 zero = 0.0;
456  const __float128 one = 1.0;
457  const __float128 two = 2.0;
459 
460  if (N <= 0) {
461  *TAU = zero;
462  return;
463  }
464  __float128 xnorm = blas.NRM2 (N-1, X, INCX);
465 
466  if (xnorm == zero) {
467  // H = [+/-1, 0; I], sign chosen so *ALPHA >= 0
468  if (*ALPHA >= zero) {
469  // When TAU.eq.ZERO, the vector is special-cased to be all zeros
470  // in the application routines. We do not need to clear it.
471  *TAU = zero;
472  } else {
473  // However, the application routines rely on explicit
474  // zero checks when TAU.ne.ZERO, and we must clear X.
475  *TAU = two;
476  for (int j = 0; j < N; ++j) {
477  X[j * INCX] = 0.0;
478  }
479  *ALPHA = -*ALPHA;
480  }
481  } else { // general case (norm of x is nonzero)
482  // This implements Fortran's two-argument SIGN intrinsic.
483  __float128 beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
484  const __float128 smlnum = FLT128_MIN / FLT128_EPSILON;
485  int knt = 0;
486 
487  if (fabsq (beta) < smlnum) {
488  // XNORM, BETA may be inaccurate; scale X and recompute them
489 
490  __float128 bignum = one / smlnum;
491  do {
492  knt = knt + 1;
493  blas.SCAL (N-1, bignum, X, INCX);
494  beta = beta*bignum;
495  *ALPHA = *ALPHA*bignum;
496  } while (fabsq(beta) < smlnum);
497 
498  // New BETA is at most 1, at least SMLNUM
499  xnorm = blas.NRM2 (N-1, X, INCX);
500  beta = copysignq (LAPY2 (*ALPHA, xnorm), *ALPHA);
501  }
502 
503  __float128 savealpha = *ALPHA;
504  *ALPHA = *ALPHA + beta;
505  if (beta < zero) {
506  beta = -beta;
507  *TAU = -*ALPHA / beta;
508  } else {
509  *ALPHA = xnorm * (xnorm / *ALPHA);
510  *TAU = *ALPHA / beta;
511  *ALPHA = -*ALPHA;
512  }
513 
514  if (fabsq (*TAU) <= smlnum) {
515  // In the case where the computed TAU ends up being a
516  // denormalized number, it loses relative accuracy. This is a
517  // BIG problem. Solution: flush TAU to ZERO. This explains the
518  // next IF statement.
519  //
520  // (Bug report provided by Pat Quillen from MathWorks on Jul 29,
521  // 2009.) (Thanks Pat. Thanks MathWorks.)
522 
523  if (savealpha >= zero) {
524  *TAU = zero;
525  } else {
526  *TAU = two;
527  for (int j = 0; j < N; ++j) {
528  X[j*INCX] = 0.0;
529  }
530  beta = -savealpha;
531  }
532  }
533  else { // this is the general case
534  blas.SCAL (N-1, one / *ALPHA, X, INCX);
535  }
536  // If BETA is subnormal, it may lose relative accuracy
537  for (int j = 1; j <= knt; ++j) {
538  beta = beta*smlnum;
539  }
540  *ALPHA = beta;
541  }
542 }
543 
544 void
545 Lapack128::
546 GEQR2 (const int /* M */,
547  const int /* N */,
548  __float128 /* A */ [],
549  const int /* LDA */,
550  __float128 /* TAU */ [],
551  __float128 /* WORK */ [],
552  int* const /* INFO */ ) const
553 {
555  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
556 }
557 
558 void
559 Lapack128::
560 GEQRF (const int M,
561  const int N,
562  __float128 A[],
563  const int LDA,
564  __float128 TAU[],
565  __float128 WORK[],
566  const int LWORK,
567  int* const INFO) const
568 {
569  // mfh 17 Sep 2015: We don't implement a BLAS 3 QR factorization for
570  // __float128. Instead, we call the BLAS 2 QR factorization GEQR2,
571  // which has a fixed minimum WORK array length of N. Thus, we have
572  // to roll our own LWORK query here.
573 
574  if (LWORK == -1) {
575  WORK[0] = static_cast<__float128> (N);
576  }
577  else {
578  GEQR2 (M, N, A, LDA, TAU, WORK, INFO);
579  }
580 }
581 
582 void
583 Lapack128::
584 ORGQR (const int /* M */,
585  const int /* N */,
586  const int /* K */,
587  __float128 /* A */ [],
588  const int /* LDA */,
589  const __float128 /* TAU */ [],
590  __float128 /* WORK */ [],
591  const int /* LWORK */,
592  int* const /* INFO */) const
593 {
595  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
596 }
597 
598 void
599 Lapack128::
600 UNGQR (const int /* M */,
601  const int /* N */,
602  const int /* K */,
603  __float128 /* A */ [],
604  const int /* LDA */,
605  const __float128 /* TAU */ [],
606  __float128 /* WORK */ [],
607  const int /* LWORK */,
608  int* const /* INFO */) const
609 {
611  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GEQR2: Not implemented yet.");
612 }
613 
614 void
615 Lapack128::
616 LASCL (const char TYPE,
617  const int kl,
618  const int ku,
619  const __float128 cfrom,
620  const __float128 cto,
621  const int m,
622  const int n,
623  __float128* A,
624  const int lda,
625  int* info) const
626 {
628  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::LASCL: Not implemented yet.");
629 }
630 
631 void
632 Lapack128::
633 GBTRF (const int m,
634  const int n,
635  const int kl,
636  const int ku,
637  __float128* A,
638  const int lda,
639  int* IPIV,
640  int* info) const
641 {
643  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GBTRF: Not implemented yet.");
644 }
645 
646 void
647 Lapack128::
648 GBTRS (const char TRANS,
649  const int n,
650  const int kl,
651  const int ku,
652  const int nrhs,
653  const __float128* A,
654  const int lda,
655  const int* IPIV,
656  __float128* B,
657  const int ldb,
658  int* info) const
659 {
661  (true, std::logic_error, "Teuchos::LAPACK<int, __float128>::GBTRS: Not implemented yet.");
662 }
663 
664 } // namespace Details
665 } // namespace Teuchos
666 #endif // HAVE_TEUCHOSCORE_QUADMATH
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A &lt;- alpha*x*y&#39;+A.
Declaration and definition of Teuchos::Details::Lapack128, a partial implementation of Teuchos::LAPAC...
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y &lt;- alpha*A*x+beta*y or y &lt;- alpha*A&#39;*x+beta*y where A is a gene...
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Templated BLAS wrapper.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.