Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_BLAS.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_BLAS.hpp"
44 
45 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
46 the appropriate declaration of one will need to be added back into
47 functions that include the macro:
48 */
49 
50 namespace {
51 #if defined (INTEL_CXML)
52  unsigned int one=1;
53 #endif
54 } // namespace
55 
56 #ifdef CHAR_MACRO
57 #undef CHAR_MACRO
58 #endif
59 #if defined (INTEL_CXML)
60 #define CHAR_MACRO(char_var) &char_var, one
61 #else
62 #define CHAR_MACRO(char_var) &char_var
63 #endif
64 
65 
66 const char Teuchos::ESideChar[] = {'L' , 'R' };
67 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
68 const char Teuchos::EUploChar[] = {'U' , 'L' };
69 const char Teuchos::EDiagChar[] = {'U' , 'N' };
70 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
71 //const char Teuchos::EFactChar[] = {'F', 'N' };
72 //const char Teuchos::ENormChar[] = {'O', 'I' };
73 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
74 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
75 //const char Teuchos::EJobSChar[] = {'E', 'S' };
76 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
77 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
78 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
79 //const char Teuchos::ESortChar[] = {'N', 'S'};
80 
81 
82 namespace {
83 
84 
85 template<typename Scalar>
86 Scalar generic_dot(const int& n, const Scalar* x, const int& incx,
87  const Scalar* y, const int& incy)
88 {
90  Scalar dot = 0.0;
91  if (incx==1 && incy==1) {
92  for (int i = 0; i < n; ++i)
93  dot += (*x++)*ST::conjugate(*y++);
94  }
95  else {
96  if (incx < 0)
97  x = x - incx*(n-1);
98  if (incy < 0)
99  y = y - incy*(n-1);
100  for (int i = 0; i < n; ++i, x+=incx, y+=incy)
101  dot += (*x)*ST::conjugate(*y);
102  }
103  return dot;
104 }
105 
106 
107 } // namespace
108 
109 
110 namespace Teuchos {
111 
112 //Explicitly instantiating these templates for windows due to an issue with
113 //resolving them when linking dlls.
114 #ifdef _MSC_VER
115 # ifdef HAVE_TEUCHOS_COMPLEX
116  template BLAS<long int, std::complex<float> >;
117  template BLAS<long int, std::complex<double> >;
118 # endif
119  template BLAS<long int, float>;
120  template BLAS<long int, double>;
121 #endif
122 
123  // *************************** BLAS<int,float> DEFINITIONS ******************************
124 
125  void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
126  { SROTG_F77(da, db, c, s ); }
127 
128  void BLAS<int, float>::ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const
129  { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
130 
131 
132  float BLAS<int, float>::ASUM(const int& n, const float* x, const int& incx) const
133  {
134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
135  return cblas_sasum(n, x, incx);
136 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
137  float tmp = SASUM_F77(&n, x, &incx);
138  return tmp;
139 #else
140  typedef ScalarTraits<float> ST;
141  float sum = 0.0;
142  if (incx == 1) {
143  for (int i = 0; i < n; ++i)
144  sum += ST::magnitude(*x++);
145  }
146  else {
147  for (int i = 0; i < n; ++i, x+=incx)
148  sum += ST::magnitude(*x);
149  }
150  return sum;
151 #endif
152  }
153 
154  void BLAS<int, float>::AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const
155  { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
156 
157  void BLAS<int, float>::COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const
158  { SCOPY_F77(&n, x, &incx, y, &incy); }
159 
160  float BLAS<int, float>::DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const
161  {
162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
163  return cblas_sdot(n, x, incx, y, incy);
164 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
165  return SDOT_F77(&n, x, &incx, y, &incy);
166 #else
167  return generic_dot(n, x, incx, y, incy);
168 #endif
169  }
170 
171  int BLAS<int, float>::IAMAX(const int& n, const float* x, const int& incx) const
172  { return ISAMAX_F77(&n, x, &incx); }
173 
174  float BLAS<int, float>::NRM2(const int& n, const float* x, const int& incx) const
175  {
176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
177  return cblas_snrm2(n, x, incx);
178 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
179  return SNRM2_F77(&n, x, &incx);
180 #else
181  return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
182 #endif
183  }
184 
185  void BLAS<int, float>::SCAL(const int& n, const float& alpha, float* x, const int& incx) const
186  { SSCAL_F77(&n, &alpha, x, &incx); }
187 
188  void BLAS<int, float>::GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const
189  { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
190 
191  void BLAS<int, float>::GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const
192  { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
193 
194  void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const
195  { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
196 
197  void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
198  { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
199 
200  void BLAS<int, float>::SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const
201  {
202  SSWAP_F77 (&n, x, &incx, y, &incy);
203  }
204 
205  void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
206  { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
207 
208  void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
209  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
210 
211  void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
212  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
213 
214  void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
215  { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
216 
217  void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
218  { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
219 
220  // *************************** BLAS<int,double> DEFINITIONS ******************************
221 
222  void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
223  { DROTG_F77(da, db, c, s); }
224 
225  void BLAS<int, double>::ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const
226  { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
227 
228  double BLAS<int, double>::ASUM(const int& n, const double* x, const int& incx) const
229  { return DASUM_F77(&n, x, &incx); }
230 
231  void BLAS<int, double>::AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const
232  { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
233 
234  void BLAS<int, double>::COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const
235  { DCOPY_F77(&n, x, &incx, y, &incy); }
236 
237  double BLAS<int, double>::DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const
238  {
239  return DDOT_F77(&n, x, &incx, y, &incy);
240  }
241 
242  int BLAS<int, double>::IAMAX(const int& n, const double* x, const int& incx) const
243  { return IDAMAX_F77(&n, x, &incx); }
244 
245  double BLAS<int, double>::NRM2(const int& n, const double* x, const int& incx) const
246  { return DNRM2_F77(&n, x, &incx); }
247 
248  void BLAS<int, double>::SCAL(const int& n, const double& alpha, double* x, const int& incx) const
249  { DSCAL_F77(&n, &alpha, x, &incx); }
250 
251  void BLAS<int, double>::GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const
252  { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
253 
254  void BLAS<int, double>::GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const
255  { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
256 
257  void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const
258  { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
259 
260  void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
261  { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
262 
263  void BLAS<int, double>::SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const
264  {
265  DSWAP_F77 (&n, x, &incx, y, &incy);
266  }
267 
268  void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
269  { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
270 
271  void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
272  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
273 
274  void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
275  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
276 
277  void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
278  { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
279 
280  void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
281  { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
282 
283 #ifdef HAVE_TEUCHOS_COMPLEX
284 
285  // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
286 
287  void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
288  { CROTG_F77(da, db, c, s ); }
289 
290  void BLAS<int, std::complex<float> >::ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const
291  { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
292 
293  float BLAS<int, std::complex<float> >::ASUM(const int& n, const std::complex<float>* x, const int& incx) const
294  {
295 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296  return cblas_scasum(n, x, incx);
297 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
298  return (float) SCASUM_F77(&n, x, &incx);
299 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
300  return SCASUM_F77(&n, x, &incx);
301 #else // Wow, you just plain don't have this routine.
302  // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
303  // I've enhanced this by accumulating in double precision.
304  double result = 0;
305  if (incx == 1) {
306  for (int i = 0; i < n; ++i) {
307  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
308  }
309  } else {
310  const int nincx = n * incx;
311  for (int i = 0; i < nincx; i += incx) {
312  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
313  }
314  }
315  return static_cast<float> (result);
316 #endif
317  }
318 
319  void BLAS<int, std::complex<float> >::AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
320  { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
321 
322  void BLAS<int, std::complex<float> >::COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
323  { CCOPY_F77(&n, x, &incx, y, &incy); }
324 
325  std::complex<float> BLAS<int, std::complex<float> >::DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const
326  {
327 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
328  std::complex<float> z;
329  cblas_cdotc_sub(n,x,incx,y,incy,&z);
330  return z;
331 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
332  std::complex<float> z;
333  CDOT_F77(&z, &n, x, &incx, y, &incy);
334  return z;
335 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
336  return CDOT_F77(&n, x, &incx, y, &incy);
337 #else // Wow, you just plain don't have this routine.
338  // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
339  // I've enhanced this by accumulating in double precision.
340  std::complex<double> result (0, 0);
341  if (n >= 0) {
342  if (incx == 1 && incy == 1) {
343  for (int i = 0; i < n; ++i) {
344  result += std::conj (x[i]) * y[i];
345  }
346  } else {
347  int ix = 0;
348  int iy = 0;
349  if (incx < 0) {
350  ix = (1-n) * incx;
351  }
352  if (incy < 0) {
353  iy = (1-n) * incy;
354  }
355  for (int i = 0; i < n; ++i) {
356  result += std::conj (x[ix]) * y[iy];
357  ix += incx;
358  iy += incy;
359  }
360  }
361  }
362  return static_cast<std::complex<float> > (result);
363 #endif
364  }
365 
366  int BLAS<int, std::complex<float> >::IAMAX(const int& n, const std::complex<float>* x, const int& incx) const
367  { return ICAMAX_F77(&n, x, &incx); }
368 
369  float BLAS<int, std::complex<float> >::NRM2(const int& n, const std::complex<float>* x, const int& incx) const
370  {
371 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
372  return cblas_scnrm2(n, x, incx);
373 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
374  return (float) SCNRM2_F77(&n, x, &incx);
375 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
376  return SCNRM2_F77(&n, x, &incx);
377 #else // Wow, you just plain don't have this routine.
378  // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
379  // I've enhanced this by accumulating in double precision.
380  if (n < 1 || incx < 1) {
381  return 0;
382  } else {
383  double scale = 0;
384  double ssq = 1;
385 
386  const int upper = 1 + (n-1)*incx;
387  for (int ix = 0; ix < upper; ix += incx) {
388  // The reference BLAS implementation cleverly scales the
389  // intermediate result. so that even if the square of the norm
390  // would overflow, computing the norm itself does not. Hence,
391  // "ssq" for "scaled square root."
392  if (std::real (x[ix]) != 0) {
393  const double temp = std::abs (std::real (x[ix]));
394  if (scale < temp) {
395  const double scale_over_temp = scale / temp;
396  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
397  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
398  scale = temp;
399  } else {
400  const double temp_over_scale = temp / scale;
401  ssq = ssq + temp_over_scale*temp_over_scale;
402  }
403  }
404  if (std::imag (x[ix]) != 0) {
405  const double temp = std::abs (std::imag (x[ix]));
406  if (scale < temp) {
407  const double scale_over_temp = scale / temp;
408  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
409  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
410  scale = temp;
411  } else {
412  const double temp_over_scale = temp / scale;
413  ssq = ssq + temp_over_scale*temp_over_scale;
414  }
415  }
416  }
417  return static_cast<float> (scale * std::sqrt (ssq));
418  }
419 #endif
420  }
421 
422  void BLAS<int, std::complex<float> >::SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const
423  { CSCAL_F77(&n, &alpha, x, &incx); }
424 
425  void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const
426  { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
427 
428  void BLAS<int, std::complex<float> >::GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const
429  { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
430 
431  void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const
432  { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
433 
434  void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
435  { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
436 
437  void BLAS<int, std::complex<float> >::SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const
438  {
439  CSWAP_F77 (&n, x, &incx, y, &incy);
440  }
441 
442  void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
443  { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
444 
445  void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
446  { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
447 
448  void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
449  { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
450 
451  void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
452  { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
453 
454  void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
455  { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
456 
457  // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
458 
459  void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
460  { ZROTG_F77(da, db, c, s); }
461 
462  void BLAS<int, std::complex<double> >::ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const
463  { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
464 
465  double BLAS<int, std::complex<double> >::ASUM(const int& n, const std::complex<double>* x, const int& incx) const
466  { return ZASUM_F77(&n, x, &incx); }
467 
468  void BLAS<int, std::complex<double> >::AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
469  { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
470 
471  void BLAS<int, std::complex<double> >::COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
472  { ZCOPY_F77(&n, x, &incx, y, &incy); }
473 
474  std::complex<double> BLAS<int, std::complex<double> >::DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const
475  {
476 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
477  std::complex<double> z;
478  cblas_zdotc_sub(n,x,incx,y,incy,&z);
479  return z;
480 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
481 # if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
482  std::complex<double> z;
483  ZDOT_F77(&z, &n, x, &incx, y, &incy);
484  return z;
485 # else
486  // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
487  // doesn't have the easy workaround. I'll just reimplement the
488  // missing routine here. See www.netlib.org/blas/zdotc.f.
489  std::complex<double> ztemp (0, 0);
490  if (n > 0) {
491  if (incx == 1 && incy == 1) {
492  for (int i = 0; i < n; ++i) {
493  ztemp += std::conj (x[i]) * y[i];
494  }
495  } else {
496  int ix = 0;
497  int iy = 0;
498  if (incx < 0) {
499  ix = (1-n)*incx;
500  }
501  if (incy < 0) {
502  iy = (1-n)*incy;
503  }
504  for (int i = 0; i < n; ++i) {
505  ztemp += std::conj (x[ix]) * y[iy];
506  ix += incx;
507  iy += incy;
508  }
509  }
510  }
511  return ztemp;
512 
513 # endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
514 #else
515  return ZDOT_F77(&n, x, &incx, y, &incy);
516 #endif
517  }
518 
519  int BLAS<int, std::complex<double> >::IAMAX(const int& n, const std::complex<double>* x, const int& incx) const
520  { return IZAMAX_F77(&n, x, &incx); }
521 
522  double BLAS<int, std::complex<double> >::NRM2(const int& n, const std::complex<double>* x, const int& incx) const
523  { return ZNRM2_F77(&n, x, &incx); }
524 
525  void BLAS<int, std::complex<double> >::SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const
526  { ZSCAL_F77(&n, &alpha, x, &incx); }
527 
528  void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const
529  { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
530 
531  void BLAS<int, std::complex<double> >::GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const
532  { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
533 
534  void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const
535  { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
536 
537  void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
538  { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
539 
540  void BLAS<int, std::complex<double> >::SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const
541  {
542  ZSWAP_F77 (&n, x, &incx, y, &incy);
543  }
544 
545  void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const
546  { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
547 
548  void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
549  { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
550 
551  void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
552  { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
553 
554  void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
555  { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
556 
557  void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
558  { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
559 
560 #endif // HAVE_TEUCHOS_COMPLEX
561 
562 }
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A &lt;- alpha*x*y&#39;+A.
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y &lt;- y+alpha*x.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x &lt;- A*x or x &lt;- A&#39;*x where A is a unit/non-unit n by n upper/low...
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y &lt;- alpha*A*x+beta*y or y &lt;- alpha*A&#39;*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
The Templated BLAS wrappers.
This structure defines some basic traits for a scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C &lt;- alpha*A*B+beta*C or C &lt;- alpha*B*A+beta*C where A is an m ...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B &lt;- alpha*op(A)*B or B &lt;- alpha*B*op(A) where op(A) is an unit...
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C &lt;- alpha*A*A&#39;+beta*C or C &lt;- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.