Teuchos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_BLAS.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_BLAS.hpp"
44 
45 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
46 the appropriate declaration of one will need to be added back into
47 functions that include the macro:
48 */
49 
50 namespace {
51 #if defined (INTEL_CXML)
52  unsigned int one=1;
53 #endif
54 } // namespace
55 
56 #ifdef CHAR_MACRO
57 #undef CHAR_MACRO
58 #endif
59 #if defined (INTEL_CXML)
60 #define CHAR_MACRO(char_var) &char_var, one
61 #else
62 #define CHAR_MACRO(char_var) &char_var
63 #endif
64 
65 
66 const char Teuchos::ESideChar[] = {'L' , 'R' };
67 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
68 const char Teuchos::EUploChar[] = {'U' , 'L' };
69 const char Teuchos::EDiagChar[] = {'U' , 'N' };
70 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
71 //const char Teuchos::EFactChar[] = {'F', 'N' };
72 //const char Teuchos::ENormChar[] = {'O', 'I' };
73 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
74 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
75 //const char Teuchos::EJobSChar[] = {'E', 'S' };
76 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
77 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
78 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
79 //const char Teuchos::ESortChar[] = {'N', 'S'};
80 
81 
82 namespace {
83 
84 
85 template<typename Scalar>
86 Scalar generic_dot(const int& n, const Scalar* x, const int& incx,
87  const Scalar* y, const int& incy)
88 {
90  Scalar dot = 0.0;
91  if (incx==1 && incy==1) {
92  for (int i = 0; i < n; ++i)
93  dot += (*x++)*ST::conjugate(*y++);
94  }
95  else {
96  if (incx < 0)
97  x = x - incx*(n-1);
98  if (incy < 0)
99  y = y - incy*(n-1);
100  for (int i = 0; i < n; ++i, x+=incx, y+=incy)
101  dot += (*x)*ST::conjugate(*y);
102  }
103  return dot;
104 }
105 
106 
107 } // namespace
108 
109 
110 namespace Teuchos {
111 
112 //Explicitly instantiating these templates for windows due to an issue with
113 //resolving them when linking dlls.
114 #ifdef _MSC_VER
115 # ifdef HAVE_TEUCHOS_COMPLEX
116  template class BLAS<long int, std::complex<float> >;
117  template class BLAS<long int, std::complex<double> >;
118 # endif
119  template class BLAS<long int, float>;
120  template class BLAS<long int, double>;
121 #endif
122 
123  // *************************** BLAS<int,float> DEFINITIONS ******************************
124 
125  void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
126  { SROTG_F77(da, db, c, s ); }
127 
128  void BLAS<int, float>::ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const
129  { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
130 
131 
132  float BLAS<int, float>::ASUM(const int& n, const float* x, const int& incx) const
133  {
134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
135  return cblas_sasum(n, x, incx);
136 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
137  float tmp = SASUM_F77(&n, x, &incx);
138  return tmp;
139 #else
140  typedef ScalarTraits<float> ST;
141  float sum = 0.0;
142  if (incx == 1) {
143  for (int i = 0; i < n; ++i)
144  sum += ST::magnitude(*x++);
145  }
146  else {
147  for (int i = 0; i < n; ++i, x+=incx)
148  sum += ST::magnitude(*x);
149  }
150  return sum;
151 #endif
152  }
153 
154  void BLAS<int, float>::AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const
155  { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
156 
157  void BLAS<int, float>::COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const
158  { SCOPY_F77(&n, x, &incx, y, &incy); }
159 
160  float BLAS<int, float>::DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const
161  {
162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
163  return cblas_sdot(n, x, incx, y, incy);
164 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
165  return SDOT_F77(&n, x, &incx, y, &incy);
166 #else
167  return generic_dot(n, x, incx, y, incy);
168 #endif
169  }
170 
171  int BLAS<int, float>::IAMAX(const int& n, const float* x, const int& incx) const
172  { return ISAMAX_F77(&n, x, &incx); }
173 
174  float BLAS<int, float>::NRM2(const int& n, const float* x, const int& incx) const
175  {
176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
177  return cblas_snrm2(n, x, incx);
178 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
179  return SNRM2_F77(&n, x, &incx);
180 #else
181  return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
182 #endif
183  }
184 
185  void BLAS<int, float>::SCAL(const int& n, const float& alpha, float* x, const int& incx) const
186  { SSCAL_F77(&n, &alpha, x, &incx); }
187 
188  void BLAS<int, float>::GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const
189  { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
190 
191  void BLAS<int, float>::GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const
192  { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
193 
194  void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const
195  { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
196 
197  void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
198  { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
199 
200  void BLAS<int, float>::SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const
201  {
202  SSWAP_F77 (&n, x, &incx, y, &incy);
203  }
204 
205  void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
206  { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
207 
208  void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
209  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
210 
211  void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
212  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
213 
214  void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
215  { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
216 
217  void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
218  { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
219 
220  // *************************** BLAS<int,double> DEFINITIONS ******************************
221 
222  void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
223  { DROTG_F77(da, db, c, s); }
224 
225  void BLAS<int, double>::ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const
226  { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
227 
228  double BLAS<int, double>::ASUM(const int& n, const double* x, const int& incx) const
229  { return DASUM_F77(&n, x, &incx); }
230 
231  void BLAS<int, double>::AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const
232  { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
233 
234  void BLAS<int, double>::COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const
235  { DCOPY_F77(&n, x, &incx, y, &incy); }
236 
237  double BLAS<int, double>::DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const
238  {
239  return DDOT_F77(&n, x, &incx, y, &incy);
240  }
241 
242  int BLAS<int, double>::IAMAX(const int& n, const double* x, const int& incx) const
243  { return IDAMAX_F77(&n, x, &incx); }
244 
245  double BLAS<int, double>::NRM2(const int& n, const double* x, const int& incx) const
246  { return DNRM2_F77(&n, x, &incx); }
247 
248  void BLAS<int, double>::SCAL(const int& n, const double& alpha, double* x, const int& incx) const
249  { DSCAL_F77(&n, &alpha, x, &incx); }
250 
251  void BLAS<int, double>::GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const
252  { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
253 
254  void BLAS<int, double>::GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const
255  { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
256 
257  void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const
258  { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
259 
260  void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
261  { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
262 
263  void BLAS<int, double>::SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const
264  {
265  DSWAP_F77 (&n, x, &incx, y, &incy);
266  }
267 
268  void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
269  { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
270 
271  void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
272  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
273 
274  void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
275  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
276 
277  void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
278  { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
279 
280  void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
281  { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
282 
283 #ifdef HAVE_TEUCHOS_COMPLEX
284 
285  // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
286 
287  void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
288  { CROTG_F77(da, db, c, s ); }
289 
290  void BLAS<int, std::complex<float> >::ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const
291  { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
292 
293  float BLAS<int, std::complex<float> >::ASUM(const int& n, const std::complex<float>* x, const int& incx) const
294  {
295 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296  return cblas_scasum(n, x, incx);
297 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
298  return (float) SCASUM_F77(&n, x, &incx);
299 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
300  return SCASUM_F77(&n, x, &incx);
301 #else // Wow, you just plain don't have this routine.
302  // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
303  // I've enhanced this by accumulating in double precision.
304  double result = 0;
305  if (incx == 1) {
306  for (int i = 0; i < n; ++i) {
307  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
308  }
309  } else {
310  const int nincx = n * incx;
311  for (int i = 0; i < nincx; i += incx) {
312  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
313  }
314  }
315  return static_cast<float> (result);
316 #endif
317  }
318 
319  void BLAS<int, std::complex<float> >::AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
320  { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
321 
322  void BLAS<int, std::complex<float> >::COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
323  { CCOPY_F77(&n, x, &incx, y, &incy); }
324 
325  std::complex<float> BLAS<int, std::complex<float> >::DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const
326  {
327 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
328  std::complex<float> z;
329  cblas_cdotc_sub(n,x,incx,y,incy,&z);
330  return z;
331 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
332  std::complex<float> z;
333  CDOT_F77(&z, &n, x, &incx, y, &incy);
334  return z;
335 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
336  Teuchos_Complex_float_type_name z = CDOT_F77(&n, x, &incx, y, &incy);
337  return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(float, z);
338 #else // Wow, you just plain don't have this routine.
339  // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
340  // I've enhanced this by accumulating in double precision.
341  std::complex<double> result (0, 0);
342  if (n >= 0) {
343  if (incx == 1 && incy == 1) {
344  for (int i = 0; i < n; ++i) {
345  result += std::conj (x[i]) * y[i];
346  }
347  } else {
348  int ix = 0;
349  int iy = 0;
350  if (incx < 0) {
351  ix = (1-n) * incx;
352  }
353  if (incy < 0) {
354  iy = (1-n) * incy;
355  }
356  for (int i = 0; i < n; ++i) {
357  result += std::conj (x[ix]) * y[iy];
358  ix += incx;
359  iy += incy;
360  }
361  }
362  }
363  return static_cast<std::complex<float> > (result);
364 #endif
365  }
366 
367  int BLAS<int, std::complex<float> >::IAMAX(const int& n, const std::complex<float>* x, const int& incx) const
368  { return ICAMAX_F77(&n, x, &incx); }
369 
370  float BLAS<int, std::complex<float> >::NRM2(const int& n, const std::complex<float>* x, const int& incx) const
371  {
372 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
373  return cblas_scnrm2(n, x, incx);
374 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
375  return (float) SCNRM2_F77(&n, x, &incx);
376 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
377  return SCNRM2_F77(&n, x, &incx);
378 #else // Wow, you just plain don't have this routine.
379  // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
380  // I've enhanced this by accumulating in double precision.
381  if (n < 1 || incx < 1) {
382  return 0;
383  } else {
384  double scale = 0;
385  double ssq = 1;
386 
387  const int upper = 1 + (n-1)*incx;
388  for (int ix = 0; ix < upper; ix += incx) {
389  // The reference BLAS implementation cleverly scales the
390  // intermediate result. so that even if the square of the norm
391  // would overflow, computing the norm itself does not. Hence,
392  // "ssq" for "scaled square root."
393  if (std::real (x[ix]) != 0) {
394  const double temp = std::abs (std::real (x[ix]));
395  if (scale < temp) {
396  const double scale_over_temp = scale / temp;
397  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
398  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
399  scale = temp;
400  } else {
401  const double temp_over_scale = temp / scale;
402  ssq = ssq + temp_over_scale*temp_over_scale;
403  }
404  }
405  if (std::imag (x[ix]) != 0) {
406  const double temp = std::abs (std::imag (x[ix]));
407  if (scale < temp) {
408  const double scale_over_temp = scale / temp;
409  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
410  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
411  scale = temp;
412  } else {
413  const double temp_over_scale = temp / scale;
414  ssq = ssq + temp_over_scale*temp_over_scale;
415  }
416  }
417  }
418  return static_cast<float> (scale * std::sqrt (ssq));
419  }
420 #endif
421  }
422 
423  void BLAS<int, std::complex<float> >::SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const
424  { CSCAL_F77(&n, &alpha, x, &incx); }
425 
426  void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const
427  { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
428 
429  void BLAS<int, std::complex<float> >::GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const
430  { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
431 
432  void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const
433  { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
434 
435  void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
436  { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
437 
438  void BLAS<int, std::complex<float> >::SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const
439  {
440  CSWAP_F77 (&n, x, &incx, y, &incy);
441  }
442 
443  void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
444  { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
445 
446  void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
447  { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
448 
449  void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
450  { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
451 
452  void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
453  { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
454 
455  void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
456  { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
457 
458  // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
459 
460  void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
461  { ZROTG_F77(da, db, c, s); }
462 
463  void BLAS<int, std::complex<double> >::ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const
464  { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
465 
466  double BLAS<int, std::complex<double> >::ASUM(const int& n, const std::complex<double>* x, const int& incx) const
467  { return ZASUM_F77(&n, x, &incx); }
468 
469  void BLAS<int, std::complex<double> >::AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
470  { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
471 
472  void BLAS<int, std::complex<double> >::COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
473  { ZCOPY_F77(&n, x, &incx, y, &incy); }
474 
475  std::complex<double> BLAS<int, std::complex<double> >::DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const
476  {
477 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
478  std::complex<double> z;
479  cblas_zdotc_sub(n,x,incx,y,incy,&z);
480  return z;
481 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
482 # if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
483  std::complex<double> z;
484  ZDOT_F77(&z, &n, x, &incx, y, &incy);
485  return z;
486 # else
487  // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
488  // doesn't have the easy workaround. I'll just reimplement the
489  // missing routine here. See www.netlib.org/blas/zdotc.f.
490  std::complex<double> ztemp (0, 0);
491  if (n > 0) {
492  if (incx == 1 && incy == 1) {
493  for (int i = 0; i < n; ++i) {
494  ztemp += std::conj (x[i]) * y[i];
495  }
496  } else {
497  int ix = 0;
498  int iy = 0;
499  if (incx < 0) {
500  ix = (1-n)*incx;
501  }
502  if (incy < 0) {
503  iy = (1-n)*incy;
504  }
505  for (int i = 0; i < n; ++i) {
506  ztemp += std::conj (x[ix]) * y[iy];
507  ix += incx;
508  iy += incy;
509  }
510  }
511  }
512  return ztemp;
513 
514 # endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
515 #else
516  Teuchos_Complex_double_type_name z = ZDOT_F77(&n, x, &incx, y, &incy);
517  return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(double, z);
518 #endif
519  }
520 
521  int BLAS<int, std::complex<double> >::IAMAX(const int& n, const std::complex<double>* x, const int& incx) const
522  { return IZAMAX_F77(&n, x, &incx); }
523 
524  double BLAS<int, std::complex<double> >::NRM2(const int& n, const std::complex<double>* x, const int& incx) const
525  { return ZNRM2_F77(&n, x, &incx); }
526 
527  void BLAS<int, std::complex<double> >::SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const
528  { ZSCAL_F77(&n, &alpha, x, &incx); }
529 
530  void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const
531  { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
532 
533  void BLAS<int, std::complex<double> >::GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const
534  { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
535 
536  void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const
537  { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
538 
539  void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
540  { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
541 
542  void BLAS<int, std::complex<double> >::SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const
543  {
544  ZSWAP_F77 (&n, x, &incx, y, &incy);
545  }
546 
547  void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const
548  { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
549 
550  void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
551  { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
552 
553  void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
554  { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
555 
556  void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
557  { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
558 
559  void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
560  { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
561 
562 #endif // HAVE_TEUCHOS_COMPLEX
563 
564 }
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A &lt;- alpha*x*y&#39;+A.
#define Teuchos_Complex_double_type_name
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y &lt;- y+alpha*x.
void PREFIX DTRMV_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *n, const double *a, const int *lda, double *x, const int *incx)
void PREFIX DROTG_F77(double *da, double *db, double *c, double *s)
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x &lt;- A*x or x &lt;- A&#39;*x where A is a unit/non-unit n by n upper/low...
void PREFIX DGEMM_F77(Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc)
void PREFIX DGEMV_F77(Teuchos_fcd, const int *m, const int *n, const double *alpha, const double A[], const int *lda, const double x[], const int *incx, const double *beta, double y[], const int *incy)
int PREFIX ISAMAX_F77(const int *n, const float *x, const int *incx)
Templated interface class to BLAS routines.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y &lt;- alpha*A*x+beta*y or y &lt;- alpha*A&#39;*x+beta*y where A is a gene...
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
void PREFIX DROT_F77(const int *n, double *dx, const int *incx, double *dy, const int *incy, double *c, double *s)
void PREFIX SGEMV_F77(Teuchos_fcd, const int *m, const int *n, const float *alpha, const float A[], const int *lda, const float x[], const int *incx, const float *beta, float y[], const int *incy)
void PREFIX SCOPY_F77(const int *n, const float *x, const int *incx, float *y, const int *incy)
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.
void PREFIX SROTG_F77(float *da, float *db, float *c, float *s)
#define Teuchos_Complex_float_type_name
void PREFIX DSCAL_F77(const int *n, const double *alpha, double *x, const int *incx)
void PREFIX DSWAP_F77(const int *const n, double *const x, const int *const incx, double *const y, const int *const incy)
void PREFIX DSYRK_F77(Teuchos_fcd, Teuchos_fcd, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *beta, double *c, const int *ldc)
void PREFIX SSYMM_F77(Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, const float *beta, float *c, const int *ldc)
Templated BLAS wrapper.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
double PREFIX DNRM2_F77(const int *n, const double x[], const int *incx)
The Templated BLAS wrappers.
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
void PREFIX DAXPY_F77(const int *n, const double *alpha, const double x[], const int *incx, double y[], const int *incy)
This structure defines some basic traits for a scalar field type.
#define SDOT_F77
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void PREFIX SSWAP_F77(const int *const n, float *const x, const int *const incx, float *const y, const int *const incy)
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
void PREFIX SGER_F77(const int *m, const int *n, const float *alpha, const float *x, const int *incx, const float *y, const int *incy, float *a, const int *lda)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
void PREFIX DTRSM_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const double *alpha, const double *a, const int *lda, double *b, const int *ldb)
double PREFIX DASUM_F77(const int *n, const double x[], const int *incx)
void PREFIX SSYRK_F77(Teuchos_fcd, Teuchos_fcd, const int *n, const int *k, const float *alpha, const float *a, const int *lda, const float *beta, float *c, const int *ldc)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[]
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void PREFIX STRMV_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *n, const float *a, const int *lda, float *x, const int *incx)
void PREFIX SSCAL_F77(const int *n, const float *alpha, float *x, const int *incx)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
int PREFIX IDAMAX_F77(const int *n, const double *x, const int *incx)
void PREFIX SGEMM_F77(Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const int *k, const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, const float *beta, float *c, const int *ldc)
#define CHAR_MACRO(char_var)
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C &lt;- alpha*A*B+beta*C or C &lt;- alpha*B*A+beta*C where A is an m ...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B &lt;- alpha*op(A)*B or B &lt;- alpha*B*op(A) where op(A) is an unit...
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
#define SASUM_F77
void PREFIX SAXPY_F77(const int *n, const float *alpha, const float x[], const int *incx, float y[], const int *incy)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C &lt;- alpha*A*A&#39;+beta*C or C &lt;- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void PREFIX STRMM_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const float *alpha, const float *a, const int *lda, float *b, const int *ldb)
void PREFIX DTRMM_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const double *alpha, const double *a, const int *lda, double *b, const int *ldb)
void PREFIX SROT_F77(const int *n, float *dx, const int *incx, float *dy, const int *incy, float *c, float *s)
void PREFIX STRSM_F77(Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const float *alpha, const float *a, const int *lda, float *b, const int *ldb)
void PREFIX DGER_F77(const int *m, const int *n, const double *alpha, const double *x, const int *incx, const double *y, const int *incy, double *a, const int *lda)
void PREFIX DCOPY_F77(const int *n, const double *x, const int *incx, double *y, const int *incy)
double PREFIX DDOT_F77(const int *n, const double x[], const int *incx, const double y[], const int *incy)
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.
#define SNRM2_F77
void PREFIX DSYMM_F77(Teuchos_fcd, Teuchos_fcd, const int *m, const int *n, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc)