Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_BLAS.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // Kris
11 // 06.16.03 -- Start over from scratch
12 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
13 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
14 // -- Added warning messages for generic calls
15 // 07.08.03 -- Move into Teuchos package/namespace
16 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
17 // * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
18 // * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
19 // * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
20 // * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
21 // * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
22 // -- Removed warning messages for generic calls
23 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
24 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
25 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
26 
27 #ifndef _TEUCHOS_BLAS_HPP_
28 #define _TEUCHOS_BLAS_HPP_
29 
37 #include "Teuchos_ConfigDefs.hpp"
38 #include "Teuchos_ScalarTraits.hpp"
40 #include "Teuchos_BLAS_types.hpp"
41 #include "Teuchos_Assert.hpp"
42 
75 namespace Teuchos
76 {
77  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[];
78  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[];
79  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[];
80  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[];
81  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[];
82 
83  // Forward declaration
84  namespace details {
85  template<typename ScalarType,
87  class GivensRotator;
88  }
89 
91 
96  template<typename OrdinalType, typename ScalarType>
98  {
99 
100  typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
101 
102  public:
104 
105 
107  inline DefaultBLASImpl(void) {}
108 
111 
113  inline virtual ~DefaultBLASImpl(void) {}
115 
117 
118 
120 
122  typedef typename details::GivensRotator<ScalarType>::c_type rotg_c_type;
123 
125  void ROTG(ScalarType* da, ScalarType* db, rotg_c_type* c, ScalarType* s) const;
126 
128  void ROT(const OrdinalType& n, ScalarType* dx, const OrdinalType& incx, ScalarType* dy, const OrdinalType& incy, MagnitudeType* c, ScalarType* s) const;
129 
131  void SCAL(const OrdinalType& n, const ScalarType& alpha, ScalarType* x, const OrdinalType& incx) const;
132 
134  void COPY(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const;
135 
137  template <typename alpha_type, typename x_type>
138  void AXPY(const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const;
139 
141  typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
142 
144  template <typename x_type, typename y_type>
145  ScalarType DOT(const OrdinalType& n, const x_type* x, const OrdinalType& incx, const y_type* y, const OrdinalType& incy) const;
146 
148  typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
149 
151  OrdinalType IAMAX(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const;
153 
155 
156 
158  template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
159  void GEMV(ETransp trans, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A,
160  const OrdinalType& lda, const x_type* x, const OrdinalType& incx, const beta_type beta, ScalarType* y, const OrdinalType& incy) const;
161 
163  template <typename A_type>
164  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType& n, const A_type* A,
165  const OrdinalType& lda, ScalarType* x, const OrdinalType& incx) const;
166 
169  template <typename alpha_type, typename x_type, typename y_type>
170  void GER(const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx,
171  const y_type* y, const OrdinalType& incy, ScalarType* A, const OrdinalType& lda) const;
173 
175 
176 
183  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
184  void GEMM(ETransp transa, ETransp transb, const OrdinalType& m, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
185 
187  void
188  SWAP (const OrdinalType& n, ScalarType* const x, const OrdinalType& incx,
189  ScalarType* const y, const OrdinalType& incy) const;
190 
192  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
193  void SYMM(ESide side, EUplo uplo, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
194 
196  template <typename alpha_type, typename A_type, typename beta_type>
197  void SYRK(EUplo uplo, ETransp trans, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const;
198 
200  template <typename alpha_type, typename A_type>
201  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType& m, const OrdinalType& n,
202  const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const;
203 
205  template <typename alpha_type, typename A_type>
206  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType& m, const OrdinalType& n,
207  const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const;
209  };
210 
211  template<typename OrdinalType, typename ScalarType>
212  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
213  {
214 
215  typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
216 
217  public:
219 
220 
222  inline BLAS(void) {}
223 
225  inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
226 
228  inline virtual ~BLAS(void) {}
230  };
231 
232 //------------------------------------------------------------------------------------------
233 // LEVEL 1 BLAS ROUTINES
234 //------------------------------------------------------------------------------------------
235 
243  namespace details {
244 
245  // Compute magnitude.
246  template<typename ScalarType, bool isComplex>
247  class MagValue {
248  public:
249  void
250  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
251  };
252 
253  // Complex-arithmetic specialization.
254  template<typename ScalarType>
255  class MagValue<ScalarType, true> {
256  public:
257  void
258  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
259  };
260 
261  // Real-arithmetic specialization.
262  template<typename ScalarType>
263  class MagValue<ScalarType, false> {
264  public:
265  void
266  blas_dabs1(const ScalarType* a, ScalarType* ret) const;
267  };
268 
269  template<typename ScalarType, bool isComplex>
270  class GivensRotator {};
271 
272  // Complex-arithmetic specialization.
273  template<typename ScalarType>
274  class GivensRotator<ScalarType, true> {
275  public:
276  typedef typename ScalarTraits<ScalarType>::magnitudeType c_type;
277  void
278  ROTG (ScalarType* ca,
279  ScalarType* cb,
281  ScalarType* s) const;
282  };
283 
284  // Real-arithmetic specialization.
285  template<typename ScalarType>
286  class GivensRotator<ScalarType, false> {
287  public:
288  typedef ScalarType c_type;
289  void
290  ROTG (ScalarType* da,
291  ScalarType* db,
292  ScalarType* c,
293  ScalarType* s) const;
294 
307  ScalarType SIGN (const ScalarType& x, const ScalarType& y) const {
308  typedef ScalarTraits<ScalarType> STS;
309 
310  if (y > STS::zero()) {
311  return STS::magnitude (x);
312  } else if (y < STS::zero()) {
313  return -STS::magnitude (x);
314  } else { // y == STS::zero()
315  // Suppose that ScalarType& implements signed zero, as IEEE
316  // 754 - compliant floating-point numbers should. You can't
317  // use == to test for signed zero, since +0 == -0. However,
318  // 1/0 = Inf > 0 and 1/-0 = -Inf < 0. Let's hope ScalarType
319  // supports Inf... we don't need to test for Inf, just see
320  // if it's greater than or less than zero.
321  //
322  // NOTE: This ONLY works if ScalarType is real. Complex
323  // infinity doesn't have a sign, so we can't compare it with
324  // zero. That's OK, because finite complex numbers don't
325  // have a sign either; they have an angle.
326  ScalarType signedInfinity = STS::one() / y;
327  if (signedInfinity > STS::zero()) {
328  return STS::magnitude (x);
329  } else {
330  // Even if ScalarType doesn't implement signed zero,
331  // Fortran's SIGN intrinsic returns -ABS(X) if the second
332  // argument Y is zero. We imitate this behavior here.
333  return -STS::magnitude (x);
334  }
335  }
336  }
337  };
338 
339  // Implementation of complex-arithmetic specialization.
340  template<typename ScalarType>
341  void
342  GivensRotator<ScalarType, true>::
343  ROTG (ScalarType* ca,
344  ScalarType* cb,
346  ScalarType* s) const
347  {
348  typedef ScalarTraits<ScalarType> STS;
349  typedef typename STS::magnitudeType MagnitudeType;
350  typedef ScalarTraits<MagnitudeType> STM;
351 
352  // This is a straightforward translation into C++ of the
353  // reference BLAS' implementation of ZROTG. You can get
354  // the Fortran 77 source code of ZROTG here:
355  //
356  // http://www.netlib.org/blas/zrotg.f
357  //
358  // I used the following rules to translate Fortran types and
359  // intrinsic functions into C++:
360  //
361  // DOUBLE PRECISION -> MagnitudeType
362  // DOUBLE COMPLEX -> ScalarType
363  // CDABS -> STS::magnitude
364  // DCMPLX -> ScalarType constructor (assuming that ScalarType
365  // is std::complex<MagnitudeType>)
366  // DCONJG -> STS::conjugate
367  // DSQRT -> STM::squareroot
368  ScalarType alpha;
369  MagnitudeType norm, scale;
370 
371  if (STS::magnitude (*ca) == STM::zero()) {
372  *c = STM::zero();
373  *s = STS::one();
374  *ca = *cb;
375  } else {
376  scale = STS::magnitude (*ca) + STS::magnitude (*cb);
377  { // I introduced temporaries into the translated BLAS code in
378  // order to make the expression easier to read and also save a
379  // few floating-point operations.
380  const MagnitudeType ca_scaled =
381  STS::magnitude (*ca / ScalarType(scale, STM::zero()));
382  const MagnitudeType cb_scaled =
383  STS::magnitude (*cb / ScalarType(scale, STM::zero()));
384  norm = scale *
385  STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
386  }
387  alpha = *ca / STS::magnitude (*ca);
388  *c = STS::magnitude (*ca) / norm;
389  *s = alpha * STS::conjugate (*cb) / norm;
390  *ca = alpha * norm;
391  }
392  }
393 
394  // Implementation of real-arithmetic specialization.
395  template<typename ScalarType>
396  void
397  GivensRotator<ScalarType, false>::
398  ROTG (ScalarType* da,
399  ScalarType* db,
400  ScalarType* c,
401  ScalarType* s) const
402  {
403  typedef ScalarTraits<ScalarType> STS;
404 
405  // This is a straightforward translation into C++ of the
406  // reference BLAS' implementation of DROTG. You can get
407  // the Fortran 77 source code of DROTG here:
408  //
409  // http://www.netlib.org/blas/drotg.f
410  //
411  // I used the following rules to translate Fortran types and
412  // intrinsic functions into C++:
413  //
414  // DOUBLE PRECISION -> ScalarType
415  // DABS -> STS::magnitude
416  // DSQRT -> STM::squareroot
417  // DSIGN -> SIGN (see below)
418  //
419  // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of
420  // the Fortran type-generic SIGN intrinsic) required special
421  // translation, which we did in a separate utility function in
422  // the specializaton of GivensRotator for real arithmetic.
423  // (ROTG for complex arithmetic doesn't require this function.)
424  // C99 provides a copysign() math library function, but we are
425  // not able to rely on the existence of C99 functions here.
426  ScalarType r, roe, scale, z;
427 
428  roe = *db;
429  if (STS::magnitude (*da) > STS::magnitude (*db)) {
430  roe = *da;
431  }
432  scale = STS::magnitude (*da) + STS::magnitude (*db);
433  if (scale == STS::zero()) {
434  *c = STS::one();
435  *s = STS::zero();
436  r = STS::zero();
437  z = STS::zero();
438  } else {
439  // I introduced temporaries into the translated BLAS code in
440  // order to make the expression easier to read and also save
441  // a few floating-point& operations.
442  const ScalarType da_scaled = *da / scale;
443  const ScalarType db_scaled = *db / scale;
444  r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
445  r = SIGN (STS::one(), roe) * r;
446  *c = *da / r;
447  *s = *db / r;
448  z = STS::one();
449  if (STS::magnitude (*da) > STS::magnitude (*db)) {
450  z = *s;
451  }
452  if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
453  z = STS::one() / *c;
454  }
455  }
456 
457  *da = r;
458  *db = z;
459  }
460 
461  // Real-valued implementation of MagValue
462  template<typename ScalarType>
463  void
464  MagValue<ScalarType, false>::
465  blas_dabs1(const ScalarType* a, ScalarType* ret) const
466  {
468  }
469 
470  // Complex-valued implementation of MagValue
471  template<typename ScalarType>
472  void
473  MagValue<ScalarType, true>::
474  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const
475  {
476  *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real());
477  *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag());
478  }
479 
480  } // namespace details
481 
482  template<typename OrdinalType, typename ScalarType>
483  void
485  ROTG (ScalarType* da,
486  ScalarType* db,
487  rotg_c_type* c,
488  ScalarType* s) const
489  {
490  details::GivensRotator<ScalarType> rotator;
491  rotator.ROTG (da, db, c, s);
492  }
493 
494  template<typename OrdinalType, typename ScalarType>
495  void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType& n, ScalarType* dx, const OrdinalType& incx, ScalarType* dy, const OrdinalType& incy, MagnitudeType* c, ScalarType* s) const
496  {
497  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
498  ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
499  if (n <= 0) return;
500  if (incx==1 && incy==1) {
501  for(OrdinalType i=0; i<n; ++i) {
502  ScalarType temp = *c*dx[i] + *s*dy[i];
503  dy[i] = *c*dy[i] - sconj*dx[i];
504  dx[i] = temp;
505  }
506  }
507  else {
508  OrdinalType ix = 0, iy = 0;
509  if (incx < izero) ix = (-n+1)*incx;
510  if (incy < izero) iy = (-n+1)*incy;
511  for(OrdinalType i=0; i<n; ++i) {
512  ScalarType temp = *c*dx[ix] + *s*dy[iy];
513  dy[iy] = *c*dy[iy] - sconj*dx[ix];
514  dx[ix] = temp;
515  ix += incx;
516  iy += incy;
517  }
518  }
519  }
520 
521  template<typename OrdinalType, typename ScalarType>
522  void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType& n, const ScalarType& alpha, ScalarType* x, const OrdinalType& incx) const
523  {
524  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
525  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
526  OrdinalType i, ix = izero;
527 
528  if ( n < ione || incx < ione )
529  return;
530 
531  // Scale the vector.
532  for(i = izero; i < n; i++)
533  {
534  x[ix] *= alpha;
535  ix += incx;
536  }
537  } /* end SCAL */
538 
539  template<typename OrdinalType, typename ScalarType>
540  void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const
541  {
542  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
543  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
544  OrdinalType i, ix = izero, iy = izero;
545  if ( n > izero ) {
546  // Set the initial indices (ix, iy).
547  if (incx < izero) { ix = (-n+ione)*incx; }
548  if (incy < izero) { iy = (-n+ione)*incy; }
549 
550  for(i = izero; i < n; i++)
551  {
552  y[iy] = x[ix];
553  ix += incx;
554  iy += incy;
555  }
556  }
557  } /* end COPY */
558 
559  template<typename OrdinalType, typename ScalarType>
560  template <typename alpha_type, typename x_type>
561  void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx, ScalarType* y, const OrdinalType& incy) const
562  {
563  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
564  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
565  OrdinalType i, ix = izero, iy = izero;
566  if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
567  {
568  // Set the initial indices (ix, iy).
569  if (incx < izero) { ix = (-n+ione)*incx; }
570  if (incy < izero) { iy = (-n+ione)*incy; }
571 
572  for(i = izero; i < n; i++)
573  {
574  y[iy] += alpha * x[ix];
575  ix += incx;
576  iy += incy;
577  }
578  }
579  } /* end AXPY */
580 
581  template<typename OrdinalType, typename ScalarType>
582  typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const
583  {
584  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
585  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
586  typename ScalarTraits<ScalarType>::magnitudeType temp, result =
588  OrdinalType i, ix = izero;
589 
590  if ( n < ione || incx < ione )
591  return result;
592 
593  details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
594  for (i = izero; i < n; i++)
595  {
596  mval.blas_dabs1( &x[ix], &temp );
597  result += temp;
598  ix += incx;
599  }
600 
601  return result;
602  } /* end ASUM */
603 
604  template<typename OrdinalType, typename ScalarType>
605  template <typename x_type, typename y_type>
606  ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType& n, const x_type* x, const OrdinalType& incx, const y_type* y, const OrdinalType& incy) const
607  {
608  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
609  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
610  ScalarType result = ScalarTraits<ScalarType>::zero();
611  OrdinalType i, ix = izero, iy = izero;
612  if( n > izero )
613  {
614  // Set the initial indices (ix,iy).
615  if (incx < izero) { ix = (-n+ione)*incx; }
616  if (incy < izero) { iy = (-n+ione)*incy; }
617 
618  for(i = izero; i < n; i++)
619  {
620  result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
621  ix += incx;
622  iy += incy;
623  }
624  }
625  return result;
626  } /* end DOT */
627 
628  template<typename OrdinalType, typename ScalarType>
629  typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const
630  {
631  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
632  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
635  OrdinalType i, ix = izero;
636 
637  if ( n < ione || incx < ione )
638  return result;
639 
640  for(i = izero; i < n; i++)
641  {
643  ix += incx;
644  }
646  return result;
647  } /* end NRM2 */
648 
649  template<typename OrdinalType, typename ScalarType>
650  OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType& n, const ScalarType* x, const OrdinalType& incx) const
651  {
652  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
653  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
654  OrdinalType result = izero, ix = izero, i;
659 
660  if ( n < ione || incx < ione )
661  return result;
662 
663  details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
664 
665  mval.blas_dabs1( &x[ix], &maxval );
666  ix += incx;
667  for(i = ione; i < n; i++)
668  {
669  mval.blas_dabs1( &x[ix], &curval );
670  if(curval > maxval)
671  {
672  result = i;
673  maxval = curval;
674  }
675  ix += incx;
676  }
677 
678  return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
679  } /* end IAMAX */
680 
681 //------------------------------------------------------------------------------------------
682 // LEVEL 2 BLAS ROUTINES
683 //------------------------------------------------------------------------------------------
684  template<typename OrdinalType, typename ScalarType>
685  template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
686  void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const x_type* x, const OrdinalType& incx, const beta_type beta, ScalarType* y, const OrdinalType& incy) const
687  {
688  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
689  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
690  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
691  beta_type beta_zero = ScalarTraits<beta_type>::zero();
692  x_type x_zero = ScalarTraits<x_type>::zero();
693  ScalarType y_zero = ScalarTraits<ScalarType>::zero();
694  beta_type beta_one = ScalarTraits<beta_type>::one();
695  bool noConj = true;
696  bool BadArgument = false;
697 
698  // Quick return if there is nothing to do!
699  if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
700 
701  // Otherwise, we need to check the argument list.
702  if( m < izero ) {
703  std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
704  BadArgument = true;
705  }
706  if( n < izero ) {
707  std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
708  BadArgument = true;
709  }
710  if( lda < m ) {
711  std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
712  BadArgument = true;
713  }
714  if( incx == izero ) {
715  std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
716  BadArgument = true;
717  }
718  if( incy == izero ) {
719  std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
720  BadArgument = true;
721  }
722 
723  if(!BadArgument) {
724  OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
725  OrdinalType kx = izero, ky = izero;
726  ScalarType temp;
727 
728  // Determine the lengths of the vectors x and y.
729  if(ETranspChar[trans] == 'N') {
730  lenx = n;
731  leny = m;
732  } else {
733  lenx = m;
734  leny = n;
735  }
736 
737  // Determine if this is a conjugate tranpose
738  noConj = (ETranspChar[trans] == 'T');
739 
740  // Set the starting pointers for the vectors x and y if incx/y < 0.
741  if (incx < izero ) { kx = (ione - lenx)*incx; }
742  if (incy < izero ) { ky = (ione - leny)*incy; }
743 
744  // Form y = beta*y
745  ix = kx; iy = ky;
746  if(beta != beta_one) {
747  if (incy == ione) {
748  if (beta == beta_zero) {
749  for(i = izero; i < leny; i++) { y[i] = y_zero; }
750  } else {
751  for(i = izero; i < leny; i++) { y[i] *= beta; }
752  }
753  } else {
754  if (beta == beta_zero) {
755  for(i = izero; i < leny; i++) {
756  y[iy] = y_zero;
757  iy += incy;
758  }
759  } else {
760  for(i = izero; i < leny; i++) {
761  y[iy] *= beta;
762  iy += incy;
763  }
764  }
765  }
766  }
767 
768  // Return if we don't have to do anything more.
769  if(alpha == alpha_zero) { return; }
770 
771  if( ETranspChar[trans] == 'N' ) {
772  // Form y = alpha*A*y
773  jx = kx;
774  if (incy == ione) {
775  for(j = izero; j < n; j++) {
776  if (x[jx] != x_zero) {
777  temp = alpha*x[jx];
778  for(i = izero; i < m; i++) {
779  y[i] += temp*A[j*lda + i];
780  }
781  }
782  jx += incx;
783  }
784  } else {
785  for(j = izero; j < n; j++) {
786  if (x[jx] != x_zero) {
787  temp = alpha*x[jx];
788  iy = ky;
789  for(i = izero; i < m; i++) {
790  y[iy] += temp*A[j*lda + i];
791  iy += incy;
792  }
793  }
794  jx += incx;
795  }
796  }
797  } else {
798  jy = ky;
799  if (incx == ione) {
800  for(j = izero; j < n; j++) {
801  temp = y_zero;
802  if ( noConj ) {
803  for(i = izero; i < m; i++) {
804  temp += A[j*lda + i]*x[i];
805  }
806  } else {
807  for(i = izero; i < m; i++) {
808  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
809  }
810  }
811  y[jy] += alpha*temp;
812  jy += incy;
813  }
814  } else {
815  for(j = izero; j < n; j++) {
816  temp = y_zero;
817  ix = kx;
818  if ( noConj ) {
819  for (i = izero; i < m; i++) {
820  temp += A[j*lda + i]*x[ix];
821  ix += incx;
822  }
823  } else {
824  for (i = izero; i < m; i++) {
825  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
826  ix += incx;
827  }
828  }
829  y[jy] += alpha*temp;
830  jy += incy;
831  }
832  }
833  }
834  } /* if (!BadArgument) */
835  } /* end GEMV */
836 
837  template<typename OrdinalType, typename ScalarType>
838  template <typename A_type>
839  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType& n, const A_type* A, const OrdinalType& lda, ScalarType* x, const OrdinalType& incx) const
840  {
841  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
842  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
843  ScalarType zero = ScalarTraits<ScalarType>::zero();
844  bool BadArgument = false;
845  bool noConj = true;
846 
847  // Quick return if there is nothing to do!
848  if( n == izero ){ return; }
849 
850  // Otherwise, we need to check the argument list.
851  if( n < izero ) {
852  std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
853  BadArgument = true;
854  }
855  if( lda < n ) {
856  std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
857  BadArgument = true;
858  }
859  if( incx == izero ) {
860  std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
861  BadArgument = true;
862  }
863 
864  if(!BadArgument) {
865  OrdinalType i, j, ix, jx, kx = izero;
866  ScalarType temp;
867  bool noUnit = (EDiagChar[diag] == 'N');
868 
869  // Determine if this is a conjugate tranpose
870  noConj = (ETranspChar[trans] == 'T');
871 
872  // Set the starting pointer for the vector x if incx < 0.
873  if (incx < izero) { kx = (-n+ione)*incx; }
874 
875  // Start the operations for a nontransposed triangular matrix
876  if (ETranspChar[trans] == 'N') {
877  /* Compute x = A*x */
878  if (EUploChar[uplo] == 'U') {
879  /* A is an upper triangular matrix */
880  if (incx == ione) {
881  for (j=izero; j<n; j++) {
882  if (x[j] != zero) {
883  temp = x[j];
884  for (i=izero; i<j; i++) {
885  x[i] += temp*A[j*lda + i];
886  }
887  if ( noUnit )
888  x[j] *= A[j*lda + j];
889  }
890  }
891  } else {
892  jx = kx;
893  for (j=izero; j<n; j++) {
894  if (x[jx] != zero) {
895  temp = x[jx];
896  ix = kx;
897  for (i=izero; i<j; i++) {
898  x[ix] += temp*A[j*lda + i];
899  ix += incx;
900  }
901  if ( noUnit )
902  x[jx] *= A[j*lda + j];
903  }
904  jx += incx;
905  }
906  } /* if (incx == ione) */
907  } else { /* A is a lower triangular matrix */
908  if (incx == ione) {
909  for (j=n-ione; j>-ione; j--) {
910  if (x[j] != zero) {
911  temp = x[j];
912  for (i=n-ione; i>j; i--) {
913  x[i] += temp*A[j*lda + i];
914  }
915  if ( noUnit )
916  x[j] *= A[j*lda + j];
917  }
918  }
919  } else {
920  kx += (n-ione)*incx;
921  jx = kx;
922  for (j=n-ione; j>-ione; j--) {
923  if (x[jx] != zero) {
924  temp = x[jx];
925  ix = kx;
926  for (i=n-ione; i>j; i--) {
927  x[ix] += temp*A[j*lda + i];
928  ix -= incx;
929  }
930  if ( noUnit )
931  x[jx] *= A[j*lda + j];
932  }
933  jx -= incx;
934  }
935  }
936  } /* if (EUploChar[uplo]=='U') */
937  } else { /* A is transposed/conjugated */
938  /* Compute x = A'*x */
939  if (EUploChar[uplo]=='U') {
940  /* A is an upper triangular matrix */
941  if (incx == ione) {
942  for (j=n-ione; j>-ione; j--) {
943  temp = x[j];
944  if ( noConj ) {
945  if ( noUnit )
946  temp *= A[j*lda + j];
947  for (i=j-ione; i>-ione; i--) {
948  temp += A[j*lda + i]*x[i];
949  }
950  } else {
951  if ( noUnit )
952  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
953  for (i=j-ione; i>-ione; i--) {
954  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
955  }
956  }
957  x[j] = temp;
958  }
959  } else {
960  jx = kx + (n-ione)*incx;
961  for (j=n-ione; j>-ione; j--) {
962  temp = x[jx];
963  ix = jx;
964  if ( noConj ) {
965  if ( noUnit )
966  temp *= A[j*lda + j];
967  for (i=j-ione; i>-ione; i--) {
968  ix -= incx;
969  temp += A[j*lda + i]*x[ix];
970  }
971  } else {
972  if ( noUnit )
973  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
974  for (i=j-ione; i>-ione; i--) {
975  ix -= incx;
976  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
977  }
978  }
979  x[jx] = temp;
980  jx -= incx;
981  }
982  }
983  } else {
984  /* A is a lower triangular matrix */
985  if (incx == ione) {
986  for (j=izero; j<n; j++) {
987  temp = x[j];
988  if ( noConj ) {
989  if ( noUnit )
990  temp *= A[j*lda + j];
991  for (i=j+ione; i<n; i++) {
992  temp += A[j*lda + i]*x[i];
993  }
994  } else {
995  if ( noUnit )
996  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
997  for (i=j+ione; i<n; i++) {
998  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
999  }
1000  }
1001  x[j] = temp;
1002  }
1003  } else {
1004  jx = kx;
1005  for (j=izero; j<n; j++) {
1006  temp = x[jx];
1007  ix = jx;
1008  if ( noConj ) {
1009  if ( noUnit )
1010  temp *= A[j*lda + j];
1011  for (i=j+ione; i<n; i++) {
1012  ix += incx;
1013  temp += A[j*lda + i]*x[ix];
1014  }
1015  } else {
1016  if ( noUnit )
1017  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
1018  for (i=j+ione; i<n; i++) {
1019  ix += incx;
1020  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
1021  }
1022  }
1023  x[jx] = temp;
1024  jx += incx;
1025  }
1026  }
1027  } /* if (EUploChar[uplo]=='U') */
1028  } /* if (ETranspChar[trans]=='N') */
1029  } /* if (!BadArgument) */
1030  } /* end TRMV */
1031 
1032  template<typename OrdinalType, typename ScalarType>
1033  template <typename alpha_type, typename x_type, typename y_type>
1034  void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const x_type* x, const OrdinalType& incx, const y_type* y, const OrdinalType& incy, ScalarType* A, const OrdinalType& lda) const
1035  {
1036  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1037  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1038  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1039  y_type y_zero = ScalarTraits<y_type>::zero();
1040  bool BadArgument = false;
1041 
1042  // Quick return if there is nothing to do!
1043  if( m == izero || n == izero || alpha == alpha_zero ){ return; }
1044 
1045  // Otherwise, we need to check the argument list.
1046  if( m < izero ) {
1047  std::cout << "BLAS::GER Error: M == " << m << std::endl;
1048  BadArgument = true;
1049  }
1050  if( n < izero ) {
1051  std::cout << "BLAS::GER Error: N == " << n << std::endl;
1052  BadArgument = true;
1053  }
1054  if( lda < m ) {
1055  std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1056  BadArgument = true;
1057  }
1058  if( incx == 0 ) {
1059  std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
1060  BadArgument = true;
1061  }
1062  if( incy == 0 ) {
1063  std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
1064  BadArgument = true;
1065  }
1066 
1067  if(!BadArgument) {
1068  OrdinalType i, j, ix, jy = izero, kx = izero;
1069  ScalarType temp;
1070 
1071  // Set the starting pointers for the vectors x and y if incx/y < 0.
1072  if (incx < izero) { kx = (-m+ione)*incx; }
1073  if (incy < izero) { jy = (-n+ione)*incy; }
1074 
1075  // Start the operations for incx == 1
1076  if( incx == ione ) {
1077  for( j=izero; j<n; j++ ) {
1078  if ( y[jy] != y_zero ) {
1079  temp = alpha*y[jy];
1080  for ( i=izero; i<m; i++ ) {
1081  A[j*lda + i] += x[i]*temp;
1082  }
1083  }
1084  jy += incy;
1085  }
1086  }
1087  else { // Start the operations for incx != 1
1088  for( j=izero; j<n; j++ ) {
1089  if ( y[jy] != y_zero ) {
1090  temp = alpha*y[jy];
1091  ix = kx;
1092  for( i=izero; i<m; i++ ) {
1093  A[j*lda + i] += x[ix]*temp;
1094  ix += incx;
1095  }
1096  }
1097  jy += incy;
1098  }
1099  }
1100  } /* if(!BadArgument) */
1101  } /* end GER */
1102 
1103 //------------------------------------------------------------------------------------------
1104 // LEVEL 3 BLAS ROUTINES
1105 //------------------------------------------------------------------------------------------
1106 
1107  template<typename OrdinalType, typename ScalarType>
1108  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1109  void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType& m, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const
1110  {
1111 
1112  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1113  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1114  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1115  B_type B_zero = ScalarTraits<B_type>::zero();
1116  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1117  beta_type beta_one = ScalarTraits<beta_type>::one();
1118  OrdinalType i, j, p;
1119  OrdinalType NRowA = m, NRowB = k;
1120  ScalarType temp;
1121  bool BadArgument = false;
1122  bool conjA = false, conjB = false;
1123 
1124  // Change dimensions of matrix if either matrix is transposed
1125  if( !(ETranspChar[transa]=='N') ) {
1126  NRowA = k;
1127  }
1128  if( !(ETranspChar[transb]=='N') ) {
1129  NRowB = n;
1130  }
1131 
1132  // Quick return if there is nothing to do!
1133  if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
1134  if( m < izero ) {
1135  std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
1136  BadArgument = true;
1137  }
1138  if( n < izero ) {
1139  std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
1140  BadArgument = true;
1141  }
1142  if( k < izero ) {
1143  std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
1144  BadArgument = true;
1145  }
1146  if( lda < NRowA ) {
1147  std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1148  BadArgument = true;
1149  }
1150  if( ldb < NRowB ) {
1151  std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1152  BadArgument = true;
1153  }
1154  if( ldc < m ) {
1155  std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1156  BadArgument = true;
1157  }
1158 
1159  if(!BadArgument) {
1160 
1161  // Determine if this is a conjugate tranpose
1162  conjA = (ETranspChar[transa] == 'C');
1163  conjB = (ETranspChar[transb] == 'C');
1164 
1165  // Only need to scale the resulting matrix C.
1166  if( alpha == alpha_zero ) {
1167  if( beta == beta_zero ) {
1168  for (j=izero; j<n; j++) {
1169  for (i=izero; i<m; i++) {
1170  C[j*ldc + i] = C_zero;
1171  }
1172  }
1173  } else {
1174  for (j=izero; j<n; j++) {
1175  for (i=izero; i<m; i++) {
1176  C[j*ldc + i] *= beta;
1177  }
1178  }
1179  }
1180  return;
1181  }
1182  //
1183  // Now start the operations.
1184  //
1185  if ( ETranspChar[transb]=='N' ) {
1186  if ( ETranspChar[transa]=='N' ) {
1187  // Compute C = alpha*A*B + beta*C
1188  for (j=izero; j<n; j++) {
1189  if( beta == beta_zero ) {
1190  for (i=izero; i<m; i++){
1191  C[j*ldc + i] = C_zero;
1192  }
1193  } else if( beta != beta_one ) {
1194  for (i=izero; i<m; i++){
1195  C[j*ldc + i] *= beta;
1196  }
1197  }
1198  for (p=izero; p<k; p++){
1199  if (B[j*ldb + p] != B_zero ){
1200  temp = alpha*B[j*ldb + p];
1201  for (i=izero; i<m; i++) {
1202  C[j*ldc + i] += temp*A[p*lda + i];
1203  }
1204  }
1205  }
1206  }
1207  } else if ( conjA ) {
1208  // Compute C = alpha*conj(A')*B + beta*C
1209  for (j=izero; j<n; j++) {
1210  for (i=izero; i<m; i++) {
1211  temp = C_zero;
1212  for (p=izero; p<k; p++) {
1213  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p];
1214  }
1215  if (beta == beta_zero) {
1216  C[j*ldc + i] = alpha*temp;
1217  } else {
1218  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1219  }
1220  }
1221  }
1222  } else {
1223  // Compute C = alpha*A'*B + beta*C
1224  for (j=izero; j<n; j++) {
1225  for (i=izero; i<m; i++) {
1226  temp = C_zero;
1227  for (p=izero; p<k; p++) {
1228  temp += A[i*lda + p]*B[j*ldb + p];
1229  }
1230  if (beta == beta_zero) {
1231  C[j*ldc + i] = alpha*temp;
1232  } else {
1233  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1234  }
1235  }
1236  }
1237  }
1238  } else if ( ETranspChar[transa]=='N' ) {
1239  if ( conjB ) {
1240  // Compute C = alpha*A*conj(B') + beta*C
1241  for (j=izero; j<n; j++) {
1242  if (beta == beta_zero) {
1243  for (i=izero; i<m; i++) {
1244  C[j*ldc + i] = C_zero;
1245  }
1246  } else if ( beta != beta_one ) {
1247  for (i=izero; i<m; i++) {
1248  C[j*ldc + i] *= beta;
1249  }
1250  }
1251  for (p=izero; p<k; p++) {
1252  if (B[p*ldb + j] != B_zero) {
1253  temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1254  for (i=izero; i<m; i++) {
1255  C[j*ldc + i] += temp*A[p*lda + i];
1256  }
1257  }
1258  }
1259  }
1260  } else {
1261  // Compute C = alpha*A*B' + beta*C
1262  for (j=izero; j<n; j++) {
1263  if (beta == beta_zero) {
1264  for (i=izero; i<m; i++) {
1265  C[j*ldc + i] = C_zero;
1266  }
1267  } else if ( beta != beta_one ) {
1268  for (i=izero; i<m; i++) {
1269  C[j*ldc + i] *= beta;
1270  }
1271  }
1272  for (p=izero; p<k; p++) {
1273  if (B[p*ldb + j] != B_zero) {
1274  temp = alpha*B[p*ldb + j];
1275  for (i=izero; i<m; i++) {
1276  C[j*ldc + i] += temp*A[p*lda + i];
1277  }
1278  }
1279  }
1280  }
1281  }
1282  } else if ( conjA ) {
1283  if ( conjB ) {
1284  // Compute C = alpha*conj(A')*conj(B') + beta*C
1285  for (j=izero; j<n; j++) {
1286  for (i=izero; i<m; i++) {
1287  temp = C_zero;
1288  for (p=izero; p<k; p++) {
1289  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
1290  * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1291  }
1292  if (beta == beta_zero) {
1293  C[j*ldc + i] = alpha*temp;
1294  } else {
1295  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1296  }
1297  }
1298  }
1299  } else {
1300  // Compute C = alpha*conj(A')*B' + beta*C
1301  for (j=izero; j<n; j++) {
1302  for (i=izero; i<m; i++) {
1303  temp = C_zero;
1304  for (p=izero; p<k; p++) {
1305  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
1306  * B[p*ldb + j];
1307  }
1308  if (beta == beta_zero) {
1309  C[j*ldc + i] = alpha*temp;
1310  } else {
1311  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1312  }
1313  }
1314  }
1315  }
1316  } else {
1317  if ( conjB ) {
1318  // Compute C = alpha*A'*conj(B') + beta*C
1319  for (j=izero; j<n; j++) {
1320  for (i=izero; i<m; i++) {
1321  temp = C_zero;
1322  for (p=izero; p<k; p++) {
1323  temp += A[i*lda + p]
1324  * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1325  }
1326  if (beta == beta_zero) {
1327  C[j*ldc + i] = alpha*temp;
1328  } else {
1329  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1330  }
1331  }
1332  }
1333  } else {
1334  // Compute C = alpha*A'*B' + beta*C
1335  for (j=izero; j<n; j++) {
1336  for (i=izero; i<m; i++) {
1337  temp = C_zero;
1338  for (p=izero; p<k; p++) {
1339  temp += A[i*lda + p]*B[p*ldb + j];
1340  }
1341  if (beta == beta_zero) {
1342  C[j*ldc + i] = alpha*temp;
1343  } else {
1344  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1345  }
1346  }
1347  }
1348  } // end if (ETranspChar[transa]=='N') ...
1349  } // end if (ETranspChar[transb]=='N') ...
1350  } // end if (!BadArgument) ...
1351  } // end of GEMM
1352 
1353 
1354  template<typename OrdinalType, typename ScalarType>
1356  SWAP (const OrdinalType& n, ScalarType* const x, const OrdinalType& incx,
1357  ScalarType* const y, const OrdinalType& incy) const
1358  {
1359  if (n <= 0) {
1360  return;
1361  }
1362 
1363  if (incx == 1 && incy == 1) {
1364  for (int i = 0; i < n; ++i) {
1365  ScalarType tmp = x[i];
1366  x[i] = y[i];
1367  y[i] = tmp;
1368  }
1369  return;
1370  }
1371 
1372  int ix = 1;
1373  int iy = 1;
1374  if (incx < 0) {
1375  ix = (1-n) * incx + 1;
1376  }
1377  if (incy < 0) {
1378  iy = (1-n) * incy + 1;
1379  }
1380 
1381  for (int i = 1; i <= n; ++i) {
1382  ScalarType tmp = x[ix - 1];
1383  x[ix - 1] = y[iy - 1];
1384  y[iy - 1] = tmp;
1385  ix += incx;
1386  iy += incy;
1387  }
1388  }
1389 
1390 
1391  template<typename OrdinalType, typename ScalarType>
1392  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1393  void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const B_type* B, const OrdinalType& ldb, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const
1394  {
1395  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1396  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1397  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1398  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1399  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1400  beta_type beta_one = ScalarTraits<beta_type>::one();
1401  OrdinalType i, j, k, NRowA = m;
1402  ScalarType temp1, temp2;
1403  bool BadArgument = false;
1404  bool Upper = (EUploChar[uplo] == 'U');
1405  if (ESideChar[side] == 'R') { NRowA = n; }
1406 
1407  // Quick return.
1408  if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
1409  if( m < izero ) {
1410  std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
1411  BadArgument = true; }
1412  if( n < izero ) {
1413  std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
1414  BadArgument = true; }
1415  if( lda < NRowA ) {
1416  std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1417  BadArgument = true; }
1418  if( ldb < m ) {
1419  std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1420  BadArgument = true; }
1421  if( ldc < m ) {
1422  std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1423  BadArgument = true; }
1424 
1425  if(!BadArgument) {
1426 
1427  // Only need to scale C and return.
1428  if (alpha == alpha_zero) {
1429  if (beta == beta_zero ) {
1430  for (j=izero; j<n; j++) {
1431  for (i=izero; i<m; i++) {
1432  C[j*ldc + i] = C_zero;
1433  }
1434  }
1435  } else {
1436  for (j=izero; j<n; j++) {
1437  for (i=izero; i<m; i++) {
1438  C[j*ldc + i] *= beta;
1439  }
1440  }
1441  }
1442  return;
1443  }
1444 
1445  if ( ESideChar[side] == 'L') {
1446  // Compute C = alpha*A*B + beta*C
1447 
1448  if (Upper) {
1449  // The symmetric part of A is stored in the upper triangular part of the matrix.
1450  for (j=izero; j<n; j++) {
1451  for (i=izero; i<m; i++) {
1452  temp1 = alpha*B[j*ldb + i];
1453  temp2 = C_zero;
1454  for (k=izero; k<i; k++) {
1455  C[j*ldc + k] += temp1*A[i*lda + k];
1456  temp2 += B[j*ldb + k]*A[i*lda + k];
1457  }
1458  if (beta == beta_zero) {
1459  C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1460  } else {
1461  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1462  }
1463  }
1464  }
1465  } else {
1466  // The symmetric part of A is stored in the lower triangular part of the matrix.
1467  for (j=izero; j<n; j++) {
1468  for (i=m-ione; i>-ione; i--) {
1469  temp1 = alpha*B[j*ldb + i];
1470  temp2 = C_zero;
1471  for (k=i+ione; k<m; k++) {
1472  C[j*ldc + k] += temp1*A[i*lda + k];
1473  temp2 += B[j*ldb + k]*A[i*lda + k];
1474  }
1475  if (beta == beta_zero) {
1476  C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1477  } else {
1478  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1479  }
1480  }
1481  }
1482  }
1483  } else {
1484  // Compute C = alpha*B*A + beta*C.
1485  for (j=izero; j<n; j++) {
1486  temp1 = alpha*A[j*lda + j];
1487  if (beta == beta_zero) {
1488  for (i=izero; i<m; i++) {
1489  C[j*ldc + i] = temp1*B[j*ldb + i];
1490  }
1491  } else {
1492  for (i=izero; i<m; i++) {
1493  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1494  }
1495  }
1496  for (k=izero; k<j; k++) {
1497  if (Upper) {
1498  temp1 = alpha*A[j*lda + k];
1499  } else {
1500  temp1 = alpha*A[k*lda + j];
1501  }
1502  for (i=izero; i<m; i++) {
1503  C[j*ldc + i] += temp1*B[k*ldb + i];
1504  }
1505  }
1506  for (k=j+ione; k<n; k++) {
1507  if (Upper) {
1508  temp1 = alpha*A[k*lda + j];
1509  } else {
1510  temp1 = alpha*A[j*lda + k];
1511  }
1512  for (i=izero; i<m; i++) {
1513  C[j*ldc + i] += temp1*B[k*ldb + i];
1514  }
1515  }
1516  }
1517  } // end if (ESideChar[side]=='L') ...
1518  } // end if(!BadArgument) ...
1519 } // end SYMM
1520 
1521  template<typename OrdinalType, typename ScalarType>
1522  template <typename alpha_type, typename A_type, typename beta_type>
1523  void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType& n, const OrdinalType& k, const alpha_type alpha, const A_type* A, const OrdinalType& lda, const beta_type beta, ScalarType* C, const OrdinalType& ldc) const
1524  {
1525  typedef TypeNameTraits<OrdinalType> OTNT;
1526  typedef TypeNameTraits<ScalarType> STNT;
1527 
1528  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1529  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1530  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1531  beta_type beta_one = ScalarTraits<beta_type>::one();
1532  A_type temp, A_zero = ScalarTraits<A_type>::zero();
1533  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1534  OrdinalType i, j, l, NRowA = n;
1535  bool BadArgument = false;
1536  bool Upper = (EUploChar[uplo] == 'U');
1537 
1540  && (trans == CONJ_TRANS),
1541  std::logic_error,
1542  "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
1543  " does not support CONJ_TRANS for complex data types."
1544  );
1545 
1546  // Change dimensions of A matrix is transposed
1547  if( !(ETranspChar[trans]=='N') ) {
1548  NRowA = k;
1549  }
1550 
1551  // Quick return.
1552  if ( n==izero ) { return; }
1553  if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) { return; }
1554  if( n < izero ) {
1555  std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
1556  BadArgument = true; }
1557  if( k < izero ) {
1558  std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
1559  BadArgument = true; }
1560  if( lda < NRowA ) {
1561  std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1562  BadArgument = true; }
1563  if( ldc < n ) {
1564  std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1565  BadArgument = true; }
1566 
1567  if(!BadArgument) {
1568 
1569  // Scale C when alpha is zero
1570  if (alpha == alpha_zero) {
1571  if (Upper) {
1572  if (beta==beta_zero) {
1573  for (j=izero; j<n; j++) {
1574  for (i=izero; i<=j; i++) {
1575  C[j*ldc + i] = C_zero;
1576  }
1577  }
1578  }
1579  else {
1580  for (j=izero; j<n; j++) {
1581  for (i=izero; i<=j; i++) {
1582  C[j*ldc + i] *= beta;
1583  }
1584  }
1585  }
1586  }
1587  else {
1588  if (beta==beta_zero) {
1589  for (j=izero; j<n; j++) {
1590  for (i=j; i<n; i++) {
1591  C[j*ldc + i] = C_zero;
1592  }
1593  }
1594  }
1595  else {
1596  for (j=izero; j<n; j++) {
1597  for (i=j; i<n; i++) {
1598  C[j*ldc + i] *= beta;
1599  }
1600  }
1601  }
1602  }
1603  return;
1604  }
1605 
1606  // Now we can start the computation
1607 
1608  if ( ETranspChar[trans]=='N' ) {
1609 
1610  // Form C <- alpha*A*A' + beta*C
1611  if (Upper) {
1612  for (j=izero; j<n; j++) {
1613  if (beta==beta_zero) {
1614  for (i=izero; i<=j; i++) {
1615  C[j*ldc + i] = C_zero;
1616  }
1617  }
1618  else if (beta!=beta_one) {
1619  for (i=izero; i<=j; i++) {
1620  C[j*ldc + i] *= beta;
1621  }
1622  }
1623  for (l=izero; l<k; l++) {
1624  if (A[l*lda + j] != A_zero) {
1625  temp = alpha*A[l*lda + j];
1626  for (i = izero; i <=j; i++) {
1627  C[j*ldc + i] += temp*A[l*lda + i];
1628  }
1629  }
1630  }
1631  }
1632  }
1633  else {
1634  for (j=izero; j<n; j++) {
1635  if (beta==beta_zero) {
1636  for (i=j; i<n; i++) {
1637  C[j*ldc + i] = C_zero;
1638  }
1639  }
1640  else if (beta!=beta_one) {
1641  for (i=j; i<n; i++) {
1642  C[j*ldc + i] *= beta;
1643  }
1644  }
1645  for (l=izero; l<k; l++) {
1646  if (A[l*lda + j] != A_zero) {
1647  temp = alpha*A[l*lda + j];
1648  for (i=j; i<n; i++) {
1649  C[j*ldc + i] += temp*A[l*lda + i];
1650  }
1651  }
1652  }
1653  }
1654  }
1655  }
1656  else {
1657 
1658  // Form C <- alpha*A'*A + beta*C
1659  if (Upper) {
1660  for (j=izero; j<n; j++) {
1661  for (i=izero; i<=j; i++) {
1662  temp = A_zero;
1663  for (l=izero; l<k; l++) {
1664  temp += A[i*lda + l]*A[j*lda + l];
1665  }
1666  if (beta==beta_zero) {
1667  C[j*ldc + i] = alpha*temp;
1668  }
1669  else {
1670  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1671  }
1672  }
1673  }
1674  }
1675  else {
1676  for (j=izero; j<n; j++) {
1677  for (i=j; i<n; i++) {
1678  temp = A_zero;
1679  for (l=izero; l<k; ++l) {
1680  temp += A[i*lda + l]*A[j*lda + l];
1681  }
1682  if (beta==beta_zero) {
1683  C[j*ldc + i] = alpha*temp;
1684  }
1685  else {
1686  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1687  }
1688  }
1689  }
1690  }
1691  }
1692  } /* if (!BadArgument) */
1693  } /* END SYRK */
1694 
1695  template<typename OrdinalType, typename ScalarType>
1696  template <typename alpha_type, typename A_type>
1697  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const
1698  {
1699  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1700  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1701  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1702  A_type A_zero = ScalarTraits<A_type>::zero();
1703  ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1704  ScalarType one = ScalarTraits<ScalarType>::one();
1705  OrdinalType i, j, k, NRowA = m;
1706  ScalarType temp;
1707  bool BadArgument = false;
1708  bool LSide = (ESideChar[side] == 'L');
1709  bool noUnit = (EDiagChar[diag] == 'N');
1710  bool Upper = (EUploChar[uplo] == 'U');
1711  bool noConj = (ETranspChar[transa] == 'T');
1712 
1713  if(!LSide) { NRowA = n; }
1714 
1715  // Quick return.
1716  if (n==izero || m==izero) { return; }
1717  if( m < izero ) {
1718  std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
1719  BadArgument = true; }
1720  if( n < izero ) {
1721  std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
1722  BadArgument = true; }
1723  if( lda < NRowA ) {
1724  std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1725  BadArgument = true; }
1726  if( ldb < m ) {
1727  std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1728  BadArgument = true; }
1729 
1730  if(!BadArgument) {
1731 
1732  // B only needs to be zeroed out.
1733  if( alpha == alpha_zero ) {
1734  for( j=izero; j<n; j++ ) {
1735  for( i=izero; i<m; i++ ) {
1736  B[j*ldb + i] = B_zero;
1737  }
1738  }
1739  return;
1740  }
1741 
1742  // Start the computations.
1743  if ( LSide ) {
1744  // A is on the left side of B.
1745 
1746  if ( ETranspChar[transa]=='N' ) {
1747  // Compute B = alpha*A*B
1748 
1749  if ( Upper ) {
1750  // A is upper triangular
1751  for( j=izero; j<n; j++ ) {
1752  for( k=izero; k<m; k++) {
1753  if ( B[j*ldb + k] != B_zero ) {
1754  temp = alpha*B[j*ldb + k];
1755  for( i=izero; i<k; i++ ) {
1756  B[j*ldb + i] += temp*A[k*lda + i];
1757  }
1758  if ( noUnit )
1759  temp *=A[k*lda + k];
1760  B[j*ldb + k] = temp;
1761  }
1762  }
1763  }
1764  } else {
1765  // A is lower triangular
1766  for( j=izero; j<n; j++ ) {
1767  for( k=m-ione; k>-ione; k-- ) {
1768  if( B[j*ldb + k] != B_zero ) {
1769  temp = alpha*B[j*ldb + k];
1770  B[j*ldb + k] = temp;
1771  if ( noUnit )
1772  B[j*ldb + k] *= A[k*lda + k];
1773  for( i=k+ione; i<m; i++ ) {
1774  B[j*ldb + i] += temp*A[k*lda + i];
1775  }
1776  }
1777  }
1778  }
1779  }
1780  } else {
1781  // Compute B = alpha*A'*B or B = alpha*conj(A')*B
1782  if( Upper ) {
1783  for( j=izero; j<n; j++ ) {
1784  for( i=m-ione; i>-ione; i-- ) {
1785  temp = B[j*ldb + i];
1786  if ( noConj ) {
1787  if( noUnit )
1788  temp *= A[i*lda + i];
1789  for( k=izero; k<i; k++ ) {
1790  temp += A[i*lda + k]*B[j*ldb + k];
1791  }
1792  } else {
1793  if( noUnit )
1794  temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
1795  for( k=izero; k<i; k++ ) {
1796  temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
1797  }
1798  }
1799  B[j*ldb + i] = alpha*temp;
1800  }
1801  }
1802  } else {
1803  for( j=izero; j<n; j++ ) {
1804  for( i=izero; i<m; i++ ) {
1805  temp = B[j*ldb + i];
1806  if ( noConj ) {
1807  if( noUnit )
1808  temp *= A[i*lda + i];
1809  for( k=i+ione; k<m; k++ ) {
1810  temp += A[i*lda + k]*B[j*ldb + k];
1811  }
1812  } else {
1813  if( noUnit )
1814  temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
1815  for( k=i+ione; k<m; k++ ) {
1816  temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
1817  }
1818  }
1819  B[j*ldb + i] = alpha*temp;
1820  }
1821  }
1822  }
1823  }
1824  } else {
1825  // A is on the right hand side of B.
1826 
1827  if( ETranspChar[transa] == 'N' ) {
1828  // Compute B = alpha*B*A
1829 
1830  if( Upper ) {
1831  // A is upper triangular.
1832  for( j=n-ione; j>-ione; j-- ) {
1833  temp = alpha;
1834  if( noUnit )
1835  temp *= A[j*lda + j];
1836  for( i=izero; i<m; i++ ) {
1837  B[j*ldb + i] *= temp;
1838  }
1839  for( k=izero; k<j; k++ ) {
1840  if( A[j*lda + k] != A_zero ) {
1841  temp = alpha*A[j*lda + k];
1842  for( i=izero; i<m; i++ ) {
1843  B[j*ldb + i] += temp*B[k*ldb + i];
1844  }
1845  }
1846  }
1847  }
1848  } else {
1849  // A is lower triangular.
1850  for( j=izero; j<n; j++ ) {
1851  temp = alpha;
1852  if( noUnit )
1853  temp *= A[j*lda + j];
1854  for( i=izero; i<m; i++ ) {
1855  B[j*ldb + i] *= temp;
1856  }
1857  for( k=j+ione; k<n; k++ ) {
1858  if( A[j*lda + k] != A_zero ) {
1859  temp = alpha*A[j*lda + k];
1860  for( i=izero; i<m; i++ ) {
1861  B[j*ldb + i] += temp*B[k*ldb + i];
1862  }
1863  }
1864  }
1865  }
1866  }
1867  } else {
1868  // Compute B = alpha*B*A' or B = alpha*B*conj(A')
1869 
1870  if( Upper ) {
1871  for( k=izero; k<n; k++ ) {
1872  for( j=izero; j<k; j++ ) {
1873  if( A[k*lda + j] != A_zero ) {
1874  if ( noConj )
1875  temp = alpha*A[k*lda + j];
1876  else
1877  temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
1878  for( i=izero; i<m; i++ ) {
1879  B[j*ldb + i] += temp*B[k*ldb + i];
1880  }
1881  }
1882  }
1883  temp = alpha;
1884  if( noUnit ) {
1885  if ( noConj )
1886  temp *= A[k*lda + k];
1887  else
1888  temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
1889  }
1890  if( temp != one ) {
1891  for( i=izero; i<m; i++ ) {
1892  B[k*ldb + i] *= temp;
1893  }
1894  }
1895  }
1896  } else {
1897  for( k=n-ione; k>-ione; k-- ) {
1898  for( j=k+ione; j<n; j++ ) {
1899  if( A[k*lda + j] != A_zero ) {
1900  if ( noConj )
1901  temp = alpha*A[k*lda + j];
1902  else
1903  temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
1904  for( i=izero; i<m; i++ ) {
1905  B[j*ldb + i] += temp*B[k*ldb + i];
1906  }
1907  }
1908  }
1909  temp = alpha;
1910  if( noUnit ) {
1911  if ( noConj )
1912  temp *= A[k*lda + k];
1913  else
1914  temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
1915  }
1916  if( temp != one ) {
1917  for( i=izero; i<m; i++ ) {
1918  B[k*ldb + i] *= temp;
1919  }
1920  }
1921  }
1922  }
1923  } // end if( ETranspChar[transa] == 'N' ) ...
1924  } // end if ( LSide ) ...
1925  } // end if (!BadArgument)
1926  } // end TRMM
1927 
1928  template<typename OrdinalType, typename ScalarType>
1929  template <typename alpha_type, typename A_type>
1930  void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType& m, const OrdinalType& n, const alpha_type alpha, const A_type* A, const OrdinalType& lda, ScalarType* B, const OrdinalType& ldb) const
1931  {
1932  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1933  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1934  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1935  A_type A_zero = ScalarTraits<A_type>::zero();
1936  ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1937  alpha_type alpha_one = ScalarTraits<alpha_type>::one();
1938  ScalarType B_one = ScalarTraits<ScalarType>::one();
1939  ScalarType temp;
1940  OrdinalType NRowA = m;
1941  bool BadArgument = false;
1942  bool noUnit = (EDiagChar[diag]=='N');
1943  bool noConj = (ETranspChar[transa] == 'T');
1944 
1945  if (!(ESideChar[side] == 'L')) { NRowA = n; }
1946 
1947  // Quick return.
1948  if (n == izero || m == izero) { return; }
1949  if( m < izero ) {
1950  std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
1951  BadArgument = true; }
1952  if( n < izero ) {
1953  std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
1954  BadArgument = true; }
1955  if( lda < NRowA ) {
1956  std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1957  BadArgument = true; }
1958  if( ldb < m ) {
1959  std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1960  BadArgument = true; }
1961 
1962  if(!BadArgument)
1963  {
1964  int i, j, k;
1965  // Set the solution to the zero vector.
1966  if(alpha == alpha_zero) {
1967  for(j = izero; j < n; j++) {
1968  for( i = izero; i < m; i++) {
1969  B[j*ldb+i] = B_zero;
1970  }
1971  }
1972  }
1973  else
1974  { // Start the operations.
1975  if(ESideChar[side] == 'L') {
1976  //
1977  // Perform computations for OP(A)*X = alpha*B
1978  //
1979  if(ETranspChar[transa] == 'N') {
1980  //
1981  // Compute B = alpha*inv( A )*B
1982  //
1983  if(EUploChar[uplo] == 'U') {
1984  // A is upper triangular.
1985  for(j = izero; j < n; j++) {
1986  // Perform alpha*B if alpha is not 1.
1987  if(alpha != alpha_one) {
1988  for( i = izero; i < m; i++) {
1989  B[j*ldb+i] *= alpha;
1990  }
1991  }
1992  // Perform a backsolve for column j of B.
1993  for(k = (m - ione); k > -ione; k--) {
1994  // If this entry is zero, we don't have to do anything.
1995  if (B[j*ldb + k] != B_zero) {
1996  if ( noUnit ) {
1997  B[j*ldb + k] /= A[k*lda + k];
1998  }
1999  for(i = izero; i < k; i++) {
2000  B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2001  }
2002  }
2003  }
2004  }
2005  }
2006  else
2007  { // A is lower triangular.
2008  for(j = izero; j < n; j++) {
2009  // Perform alpha*B if alpha is not 1.
2010  if(alpha != alpha_one) {
2011  for( i = izero; i < m; i++) {
2012  B[j*ldb+i] *= alpha;
2013  }
2014  }
2015  // Perform a forward solve for column j of B.
2016  for(k = izero; k < m; k++) {
2017  // If this entry is zero, we don't have to do anything.
2018  if (B[j*ldb + k] != B_zero) {
2019  if ( noUnit ) {
2020  B[j*ldb + k] /= A[k*lda + k];
2021  }
2022  for(i = k+ione; i < m; i++) {
2023  B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2024  }
2025  }
2026  }
2027  }
2028  } // end if (uplo == 'U')
2029  } // if (transa =='N')
2030  else {
2031  //
2032  // Compute B = alpha*inv( A' )*B
2033  // or B = alpha*inv( conj(A') )*B
2034  //
2035  if(EUploChar[uplo] == 'U') {
2036  // A is upper triangular.
2037  for(j = izero; j < n; j++) {
2038  for( i = izero; i < m; i++) {
2039  temp = alpha*B[j*ldb+i];
2040  if ( noConj ) {
2041  for(k = izero; k < i; k++) {
2042  temp -= A[i*lda + k] * B[j*ldb + k];
2043  }
2044  if ( noUnit ) {
2045  temp /= A[i*lda + i];
2046  }
2047  } else {
2048  for(k = izero; k < i; k++) {
2049  temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
2050  * B[j*ldb + k];
2051  }
2052  if ( noUnit ) {
2053  temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
2054  }
2055  }
2056  B[j*ldb + i] = temp;
2057  }
2058  }
2059  }
2060  else
2061  { // A is lower triangular.
2062  for(j = izero; j < n; j++) {
2063  for(i = (m - ione) ; i > -ione; i--) {
2064  temp = alpha*B[j*ldb+i];
2065  if ( noConj ) {
2066  for(k = i+ione; k < m; k++) {
2067  temp -= A[i*lda + k] * B[j*ldb + k];
2068  }
2069  if ( noUnit ) {
2070  temp /= A[i*lda + i];
2071  }
2072  } else {
2073  for(k = i+ione; k < m; k++) {
2074  temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
2075  * B[j*ldb + k];
2076  }
2077  if ( noUnit ) {
2078  temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
2079  }
2080  }
2081  B[j*ldb + i] = temp;
2082  }
2083  }
2084  }
2085  }
2086  } // if (side == 'L')
2087  else {
2088  // side == 'R'
2089  //
2090  // Perform computations for X*OP(A) = alpha*B
2091  //
2092  if (ETranspChar[transa] == 'N') {
2093  //
2094  // Compute B = alpha*B*inv( A )
2095  //
2096  if(EUploChar[uplo] == 'U') {
2097  // A is upper triangular.
2098  // Perform a backsolve for column j of B.
2099  for(j = izero; j < n; j++) {
2100  // Perform alpha*B if alpha is not 1.
2101  if(alpha != alpha_one) {
2102  for( i = izero; i < m; i++) {
2103  B[j*ldb+i] *= alpha;
2104  }
2105  }
2106  for(k = izero; k < j; k++) {
2107  // If this entry is zero, we don't have to do anything.
2108  if (A[j*lda + k] != A_zero) {
2109  for(i = izero; i < m; i++) {
2110  B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2111  }
2112  }
2113  }
2114  if ( noUnit ) {
2115  temp = B_one/A[j*lda + j];
2116  for(i = izero; i < m; i++) {
2117  B[j*ldb + i] *= temp;
2118  }
2119  }
2120  }
2121  }
2122  else
2123  { // A is lower triangular.
2124  for(j = (n - ione); j > -ione; j--) {
2125  // Perform alpha*B if alpha is not 1.
2126  if(alpha != alpha_one) {
2127  for( i = izero; i < m; i++) {
2128  B[j*ldb+i] *= alpha;
2129  }
2130  }
2131  // Perform a forward solve for column j of B.
2132  for(k = j+ione; k < n; k++) {
2133  // If this entry is zero, we don't have to do anything.
2134  if (A[j*lda + k] != A_zero) {
2135  for(i = izero; i < m; i++) {
2136  B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2137  }
2138  }
2139  }
2140  if ( noUnit ) {
2141  temp = B_one/A[j*lda + j];
2142  for(i = izero; i < m; i++) {
2143  B[j*ldb + i] *= temp;
2144  }
2145  }
2146  }
2147  } // end if (uplo == 'U')
2148  } // if (transa =='N')
2149  else {
2150  //
2151  // Compute B = alpha*B*inv( A' )
2152  // or B = alpha*B*inv( conj(A') )
2153  //
2154  if(EUploChar[uplo] == 'U') {
2155  // A is upper triangular.
2156  for(k = (n - ione); k > -ione; k--) {
2157  if ( noUnit ) {
2158  if ( noConj )
2159  temp = B_one/A[k*lda + k];
2160  else
2161  temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
2162  for(i = izero; i < m; i++) {
2163  B[k*ldb + i] *= temp;
2164  }
2165  }
2166  for(j = izero; j < k; j++) {
2167  if (A[k*lda + j] != A_zero) {
2168  if ( noConj )
2169  temp = A[k*lda + j];
2170  else
2171  temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
2172  for(i = izero; i < m; i++) {
2173  B[j*ldb + i] -= temp*B[k*ldb + i];
2174  }
2175  }
2176  }
2177  if (alpha != alpha_one) {
2178  for (i = izero; i < m; i++) {
2179  B[k*ldb + i] *= alpha;
2180  }
2181  }
2182  }
2183  }
2184  else
2185  { // A is lower triangular.
2186  for(k = izero; k < n; k++) {
2187  if ( noUnit ) {
2188  if ( noConj )
2189  temp = B_one/A[k*lda + k];
2190  else
2191  temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
2192  for (i = izero; i < m; i++) {
2193  B[k*ldb + i] *= temp;
2194  }
2195  }
2196  for(j = k+ione; j < n; j++) {
2197  if(A[k*lda + j] != A_zero) {
2198  if ( noConj )
2199  temp = A[k*lda + j];
2200  else
2201  temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
2202  for(i = izero; i < m; i++) {
2203  B[j*ldb + i] -= temp*B[k*ldb + i];
2204  }
2205  }
2206  }
2207  if (alpha != alpha_one) {
2208  for (i = izero; i < m; i++) {
2209  B[k*ldb + i] *= alpha;
2210  }
2211  }
2212  }
2213  }
2214  }
2215  }
2216  }
2217  }
2218  }
2219 
2220  // Explicit instantiation for template<int,float>
2221 
2222  template <>
2223  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, float>
2224  {
2225  public:
2226  inline BLAS(void) {}
2227  inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
2228  inline virtual ~BLAS(void) {}
2229  void ROTG(float* da, float* db, float* c, float* s) const;
2230  void ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const;
2231  float ASUM(const int& n, const float* x, const int& incx) const;
2232  void AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const;
2233  void COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const;
2234  float DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const;
2235  float NRM2(const int& n, const float* x, const int& incx) const;
2236  void SCAL(const int& n, const float& alpha, float* x, const int& incx) const;
2237  int IAMAX(const int& n, const float* x, const int& incx) const;
2238  void GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const;
2239  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const;
2240  void GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const;
2241  void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const;
2242  void SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const;
2243  void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const;
2244  void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const;
2245  void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const;
2246  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const;
2247  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const;
2248  };
2249 
2250  // Explicit instantiation for template<int,double>
2251 
2252  template<>
2253  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
2254  {
2255  public:
2256  inline BLAS(void) {}
2257  inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
2258  inline virtual ~BLAS(void) {}
2259  void ROTG(double* da, double* db, double* c, double* s) const;
2260  void ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const;
2261  double ASUM(const int& n, const double* x, const int& incx) const;
2262  void AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const;
2263  void COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const;
2264  double DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const;
2265  double NRM2(const int& n, const double* x, const int& incx) const;
2266  void SCAL(const int& n, const double& alpha, double* x, const int& incx) const;
2267  int IAMAX(const int& n, const double* x, const int& incx) const;
2268  void GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const;
2269  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const;
2270  void GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const;
2271  void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const;
2272  void SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const;
2273  void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const;
2274  void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const;
2275  void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const;
2276  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const;
2277  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const;
2278  };
2279 
2280  // Explicit instantiation for template<int,complex<float> >
2281 
2282  template<>
2283  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
2284  {
2285  public:
2286  inline BLAS(void) {}
2287  inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
2288  inline virtual ~BLAS(void) {}
2289  void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
2290  void ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const;
2291  float ASUM(const int& n, const std::complex<float>* x, const int& incx) const;
2292  void AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const;
2293  void COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const;
2294  std::complex<float> DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const;
2295  float NRM2(const int& n, const std::complex<float>* x, const int& incx) const;
2296  void SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const;
2297  int IAMAX(const int& n, const std::complex<float>* x, const int& incx) const;
2298  void GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const;
2299  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const;
2300  void GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const;
2301  void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2302  void SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const;
2303  void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> *B, const int& ldb, const std::complex<float> beta, std::complex<float> *C, const int& ldc) const;
2304  void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2305  void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const;
2306  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const;
2307  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const;
2308  };
2309 
2310  // Explicit instantiation for template<int,complex<double> >
2311 
2312  template<>
2313  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
2314  {
2315  public:
2316  inline BLAS(void) {}
2317  inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
2318  inline virtual ~BLAS(void) {}
2319  void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
2320  void ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const;
2321  double ASUM(const int& n, const std::complex<double>* x, const int& incx) const;
2322  void AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const;
2323  void COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const;
2324  std::complex<double> DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const;
2325  double NRM2(const int& n, const std::complex<double>* x, const int& incx) const;
2326  void SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const;
2327  int IAMAX(const int& n, const std::complex<double>* x, const int& incx) const;
2328  void GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const;
2329  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const;
2330  void GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const;
2331  void GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2332  void SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const;
2333  void SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const;
2334  void SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2335  void HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const;
2336  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const;
2337  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const;
2338  };
2339 
2340 } // namespace Teuchos
2341 
2342 #endif // _TEUCHOS_BLAS_HPP_
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A &lt;- alpha*x*y&#39;+A.
static T one()
Returns representation of one for this ordinal type.
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y &lt;- y+alpha*x.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
Enumerated types for BLAS input characters.
virtual ~DefaultBLASImpl(void)
Destructor.
T magnitudeType
Mandatory typedef for result of magnitude.
virtual ~BLAS(void)
Destructor.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x &lt;- A*x or x &lt;- A&#39;*x where A is a unit/non-unit n by n upper/low...
static T zero()
Returns representation of zero for this ordinal type.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y &lt;- alpha*A*x+beta*y or y &lt;- alpha*A&#39;*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
BLAS(void)
Default constructor.
DefaultBLASImpl(void)
Default constructor.
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
Templated BLAS wrapper.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
This structure defines some basic traits for a scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
static T conjugate(T a)
Returns the conjugate of the scalar type a.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C &lt;- alpha*A*B+beta*C or C &lt;- alpha*B*A+beta*C where A is an m ...
Defines basic traits for the ordinal field type.
Default traits class that just returns typeid(T).name().
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B &lt;- alpha*op(A)*B or B &lt;- alpha*B*op(A) where op(A) is an unit...
Default implementation for BLAS routines.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C &lt;- alpha*A*A&#39;+beta*C or C &lt;- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
static T one()
Returns representation of one for this scalar type.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.