Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_BLAS.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_BLAS.hpp"
12 
13 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
14 the appropriate declaration of one will need to be added back into
15 functions that include the macro:
16 */
17 
18 namespace {
19 #if defined (INTEL_CXML)
20  unsigned int one=1;
21 #endif
22 } // namespace
23 
24 #ifdef CHAR_MACRO
25 #undef CHAR_MACRO
26 #endif
27 #if defined (INTEL_CXML)
28 #define CHAR_MACRO(char_var) &char_var, one
29 #else
30 #define CHAR_MACRO(char_var) &char_var
31 #endif
32 
33 
34 const char Teuchos::ESideChar[] = {'L' , 'R' };
35 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
36 const char Teuchos::EUploChar[] = {'U' , 'L' };
37 const char Teuchos::EDiagChar[] = {'U' , 'N' };
38 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
39 //const char Teuchos::EFactChar[] = {'F', 'N' };
40 //const char Teuchos::ENormChar[] = {'O', 'I' };
41 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
42 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
43 //const char Teuchos::EJobSChar[] = {'E', 'S' };
44 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
45 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
46 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
47 //const char Teuchos::ESortChar[] = {'N', 'S'};
48 
49 
50 namespace {
51 
52 
53 template<typename Scalar>
54 Scalar generic_dot(const int& n, const Scalar* x, const int& incx,
55  const Scalar* y, const int& incy)
56 {
58  Scalar dot = 0.0;
59  if (incx==1 && incy==1) {
60  for (int i = 0; i < n; ++i)
61  dot += (*x++)*ST::conjugate(*y++);
62  }
63  else {
64  if (incx < 0)
65  x = x - incx*(n-1);
66  if (incy < 0)
67  y = y - incy*(n-1);
68  for (int i = 0; i < n; ++i, x+=incx, y+=incy)
69  dot += (*x)*ST::conjugate(*y);
70  }
71  return dot;
72 }
73 
74 
75 } // namespace
76 
77 
78 namespace Teuchos {
79 
80 //Explicitly instantiating these templates for windows due to an issue with
81 //resolving them when linking dlls.
82 #ifdef _MSC_VER
83 # ifdef HAVE_TEUCHOS_COMPLEX
84  template class BLAS<long int, std::complex<float> >;
85  template class BLAS<long int, std::complex<double> >;
86 # endif
87  template class BLAS<long int, float>;
88  template class BLAS<long int, double>;
89 #endif
90 
91  // *************************** BLAS<int,float> DEFINITIONS ******************************
92 
93  void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
94  { SROTG_F77(da, db, c, s ); }
95 
96  void BLAS<int, float>::ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const
97  { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
98 
99 
100  float BLAS<int, float>::ASUM(const int& n, const float* x, const int& incx) const
101  {
102 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
103  return cblas_sasum(n, x, incx);
104 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
105  float tmp = SASUM_F77(&n, x, &incx);
106  return tmp;
107 #else
108  typedef ScalarTraits<float> ST;
109  float sum = 0.0;
110  if (incx == 1) {
111  for (int i = 0; i < n; ++i)
112  sum += ST::magnitude(*x++);
113  }
114  else {
115  for (int i = 0; i < n; ++i, x+=incx)
116  sum += ST::magnitude(*x);
117  }
118  return sum;
119 #endif
120  }
121 
122  void BLAS<int, float>::AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const
123  { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
124 
125  void BLAS<int, float>::COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const
126  { SCOPY_F77(&n, x, &incx, y, &incy); }
127 
128  float BLAS<int, float>::DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const
129  {
130 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
131  return cblas_sdot(n, x, incx, y, incy);
132 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
133  return SDOT_F77(&n, x, &incx, y, &incy);
134 #else
135  return generic_dot(n, x, incx, y, incy);
136 #endif
137  }
138 
139  int BLAS<int, float>::IAMAX(const int& n, const float* x, const int& incx) const
140  { return ISAMAX_F77(&n, x, &incx); }
141 
142  float BLAS<int, float>::NRM2(const int& n, const float* x, const int& incx) const
143  {
144 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
145  return cblas_snrm2(n, x, incx);
146 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
147  return SNRM2_F77(&n, x, &incx);
148 #else
149  return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
150 #endif
151  }
152 
153  void BLAS<int, float>::SCAL(const int& n, const float& alpha, float* x, const int& incx) const
154  { SSCAL_F77(&n, &alpha, x, &incx); }
155 
156  void BLAS<int, float>::GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const
157  { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
158 
159  void BLAS<int, float>::GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const
160  { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
161 
162  void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const
163  { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
164 
165  void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
166  { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
167 
168  void BLAS<int, float>::SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const
169  {
170  SSWAP_F77 (&n, x, &incx, y, &incy);
171  }
172 
173  void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
174  { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
175 
176  void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
177  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
178 
179  void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
180  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
181 
182  void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
183  { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
184 
185  void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
186  { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
187 
188  // *************************** BLAS<int,double> DEFINITIONS ******************************
189 
190  void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
191  { DROTG_F77(da, db, c, s); }
192 
193  void BLAS<int, double>::ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const
194  { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
195 
196  double BLAS<int, double>::ASUM(const int& n, const double* x, const int& incx) const
197  { return DASUM_F77(&n, x, &incx); }
198 
199  void BLAS<int, double>::AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const
200  { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
201 
202  void BLAS<int, double>::COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const
203  { DCOPY_F77(&n, x, &incx, y, &incy); }
204 
205  double BLAS<int, double>::DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const
206  {
207  return DDOT_F77(&n, x, &incx, y, &incy);
208  }
209 
210  int BLAS<int, double>::IAMAX(const int& n, const double* x, const int& incx) const
211  { return IDAMAX_F77(&n, x, &incx); }
212 
213  double BLAS<int, double>::NRM2(const int& n, const double* x, const int& incx) const
214  { return DNRM2_F77(&n, x, &incx); }
215 
216  void BLAS<int, double>::SCAL(const int& n, const double& alpha, double* x, const int& incx) const
217  { DSCAL_F77(&n, &alpha, x, &incx); }
218 
219  void BLAS<int, double>::GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const
220  { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
221 
222  void BLAS<int, double>::GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const
223  { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
224 
225  void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const
226  { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
227 
228  void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
229  { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
230 
231  void BLAS<int, double>::SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const
232  {
233  DSWAP_F77 (&n, x, &incx, y, &incy);
234  }
235 
236  void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
237  { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
238 
239  void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
240  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
241 
242  void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
243  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
244 
245  void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
246  { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
247 
248  void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
249  { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
250 
251 #ifdef HAVE_TEUCHOS_COMPLEX
252 
253  // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
254 
255  void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
256  { CROTG_F77(da, db, c, s ); }
257 
258  void BLAS<int, std::complex<float> >::ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const
259  { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
260 
261  float BLAS<int, std::complex<float> >::ASUM(const int& n, const std::complex<float>* x, const int& incx) const
262  {
263 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
264  return cblas_scasum(n, x, incx);
265 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
266  return (float) SCASUM_F77(&n, x, &incx);
267 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
268  return SCASUM_F77(&n, x, &incx);
269 #else // Wow, you just plain don't have this routine.
270  // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
271  // I've enhanced this by accumulating in double precision.
272  double result = 0;
273  if (incx == 1) {
274  for (int i = 0; i < n; ++i) {
275  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
276  }
277  } else {
278  const int nincx = n * incx;
279  for (int i = 0; i < nincx; i += incx) {
280  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
281  }
282  }
283  return static_cast<float> (result);
284 #endif
285  }
286 
287  void BLAS<int, std::complex<float> >::AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
288  { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
289 
290  void BLAS<int, std::complex<float> >::COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
291  { CCOPY_F77(&n, x, &incx, y, &incy); }
292 
293  std::complex<float> BLAS<int, std::complex<float> >::DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const
294  {
295 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296  std::complex<float> z;
297  cblas_cdotc_sub(n,x,incx,y,incy,&z);
298  return z;
299 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
300  std::complex<float> z;
301  CDOT_F77(&z, &n, x, &incx, y, &incy);
302  return z;
303 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
304  Teuchos_Complex_float_type_name z = CDOT_F77(&n, x, &incx, y, &incy);
305  return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(float, z);
306 #else // Wow, you just plain don't have this routine.
307  // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
308  // I've enhanced this by accumulating in double precision.
309  std::complex<double> result (0, 0);
310  if (n >= 0) {
311  if (incx == 1 && incy == 1) {
312  for (int i = 0; i < n; ++i) {
313  result += std::conj (x[i]) * y[i];
314  }
315  } else {
316  int ix = 0;
317  int iy = 0;
318  if (incx < 0) {
319  ix = (1-n) * incx;
320  }
321  if (incy < 0) {
322  iy = (1-n) * incy;
323  }
324  for (int i = 0; i < n; ++i) {
325  result += std::conj (x[ix]) * y[iy];
326  ix += incx;
327  iy += incy;
328  }
329  }
330  }
331  return static_cast<std::complex<float> > (result);
332 #endif
333  }
334 
335  int BLAS<int, std::complex<float> >::IAMAX(const int& n, const std::complex<float>* x, const int& incx) const
336  { return ICAMAX_F77(&n, x, &incx); }
337 
338  float BLAS<int, std::complex<float> >::NRM2(const int& n, const std::complex<float>* x, const int& incx) const
339  {
340 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
341  return cblas_scnrm2(n, x, incx);
342 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
343  return (float) SCNRM2_F77(&n, x, &incx);
344 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
345  return SCNRM2_F77(&n, x, &incx);
346 #else // Wow, you just plain don't have this routine.
347  // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
348  // I've enhanced this by accumulating in double precision.
349  if (n < 1 || incx < 1) {
350  return 0;
351  } else {
352  double scale = 0;
353  double ssq = 1;
354 
355  const int upper = 1 + (n-1)*incx;
356  for (int ix = 0; ix < upper; ix += incx) {
357  // The reference BLAS implementation cleverly scales the
358  // intermediate result. so that even if the square of the norm
359  // would overflow, computing the norm itself does not. Hence,
360  // "ssq" for "scaled square root."
361  if (std::real (x[ix]) != 0) {
362  const double temp = std::abs (std::real (x[ix]));
363  if (scale < temp) {
364  const double scale_over_temp = scale / temp;
365  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
366  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
367  scale = temp;
368  } else {
369  const double temp_over_scale = temp / scale;
370  ssq = ssq + temp_over_scale*temp_over_scale;
371  }
372  }
373  if (std::imag (x[ix]) != 0) {
374  const double temp = std::abs (std::imag (x[ix]));
375  if (scale < temp) {
376  const double scale_over_temp = scale / temp;
377  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
378  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
379  scale = temp;
380  } else {
381  const double temp_over_scale = temp / scale;
382  ssq = ssq + temp_over_scale*temp_over_scale;
383  }
384  }
385  }
386  return static_cast<float> (scale * std::sqrt (ssq));
387  }
388 #endif
389  }
390 
391  void BLAS<int, std::complex<float> >::SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const
392  { CSCAL_F77(&n, &alpha, x, &incx); }
393 
394  void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const
395  { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
396 
397  void BLAS<int, std::complex<float> >::GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const
398  { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
399 
400  void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const
401  { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
402 
403  void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
404  { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
405 
406  void BLAS<int, std::complex<float> >::SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const
407  {
408  CSWAP_F77 (&n, x, &incx, y, &incy);
409  }
410 
411  void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
412  { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
413 
414  void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
415  { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
416 
417  void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
418  { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
419 
420  void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
421  { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
422 
423  void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
424  { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
425 
426  // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
427 
428  void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
429  { ZROTG_F77(da, db, c, s); }
430 
431  void BLAS<int, std::complex<double> >::ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const
432  { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
433 
434  double BLAS<int, std::complex<double> >::ASUM(const int& n, const std::complex<double>* x, const int& incx) const
435  { return ZASUM_F77(&n, x, &incx); }
436 
437  void BLAS<int, std::complex<double> >::AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
438  { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
439 
440  void BLAS<int, std::complex<double> >::COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
441  { ZCOPY_F77(&n, x, &incx, y, &incy); }
442 
443  std::complex<double> BLAS<int, std::complex<double> >::DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const
444  {
445 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
446  std::complex<double> z;
447  cblas_zdotc_sub(n,x,incx,y,incy,&z);
448  return z;
449 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
450 # if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
451  std::complex<double> z;
452  ZDOT_F77(&z, &n, x, &incx, y, &incy);
453  return z;
454 # else
455  // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
456  // doesn't have the easy workaround. I'll just reimplement the
457  // missing routine here. See www.netlib.org/blas/zdotc.f.
458  std::complex<double> ztemp (0, 0);
459  if (n > 0) {
460  if (incx == 1 && incy == 1) {
461  for (int i = 0; i < n; ++i) {
462  ztemp += std::conj (x[i]) * y[i];
463  }
464  } else {
465  int ix = 0;
466  int iy = 0;
467  if (incx < 0) {
468  ix = (1-n)*incx;
469  }
470  if (incy < 0) {
471  iy = (1-n)*incy;
472  }
473  for (int i = 0; i < n; ++i) {
474  ztemp += std::conj (x[ix]) * y[iy];
475  ix += incx;
476  iy += incy;
477  }
478  }
479  }
480  return ztemp;
481 
482 # endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
483 #else
484  Teuchos_Complex_double_type_name z = ZDOT_F77(&n, x, &incx, y, &incy);
485  return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(double, z);
486 #endif
487  }
488 
489  int BLAS<int, std::complex<double> >::IAMAX(const int& n, const std::complex<double>* x, const int& incx) const
490  { return IZAMAX_F77(&n, x, &incx); }
491 
492  double BLAS<int, std::complex<double> >::NRM2(const int& n, const std::complex<double>* x, const int& incx) const
493  { return ZNRM2_F77(&n, x, &incx); }
494 
495  void BLAS<int, std::complex<double> >::SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const
496  { ZSCAL_F77(&n, &alpha, x, &incx); }
497 
498  void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const
499  { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
500 
501  void BLAS<int, std::complex<double> >::GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const
502  { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
503 
504  void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const
505  { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
506 
507  void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
508  { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
509 
510  void BLAS<int, std::complex<double> >::SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const
511  {
512  ZSWAP_F77 (&n, x, &incx, y, &incy);
513  }
514 
515  void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const
516  { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
517 
518  void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
519  { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
520 
521  void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
522  { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
523 
524  void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
525  { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
526 
527  void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
528  { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
529 
530 #endif // HAVE_TEUCHOS_COMPLEX
531 
532 }
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A &lt;- alpha*x*y&#39;+A.
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y &lt;- y+alpha*x.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x &lt;- A*x or x &lt;- A&#39;*x where A is a unit/non-unit n by n upper/low...
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y &lt;- alpha*A*x+beta*y or y &lt;- alpha*A&#39;*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
The Templated BLAS wrappers.
This structure defines some basic traits for a scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C &lt;- alpha*A*B+beta*C or C &lt;- alpha*B*A+beta*C where A is an m ...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B &lt;- alpha*op(A)*B or B &lt;- alpha*B*op(A) where op(A) is an unit...
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C &lt;- alpha*A*A&#39;+beta*C or C &lt;- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.