Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_BLAS_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef _TEUCHOS_BLAS_MP_VECTOR_HPP_
43 #define _TEUCHOS_BLAS_MP_VECTOR_HPP_
44 
45 #include "Teuchos_BLAS.hpp"
46 #include "Sacado_MP_Vector.hpp"
47 
48 namespace Teuchos
49 {
50 
51 namespace details
52 {
53 
54 template <typename Storage>
55 class GivensRotator<Sacado::MP::Vector<Storage>, false>
56 {
57 public:
59  typedef ScalarType c_type;
60 
61  void
63  ScalarType *db,
64  ScalarType *c,
65  ScalarType *s) const
66  {
67  typedef ScalarTraits<ScalarType> STS;
68 
69  ScalarType r, roe, scale, z, da_scaled, db_scaled;
70  auto m_da = (STS::magnitude(*da) > STS::magnitude(*db));
71  mask_assign(m_da, roe) = {*da, *db};
72 
73  scale = STS::magnitude(*da) + STS::magnitude(*db);
74 
75  auto m_scale = scale != STS::zero();
76 
77  da_scaled = *da;
78  db_scaled = *db;
79 
80  *c = *da;
81  *s = *db;
82 
83  ScalarType tmp = STS::one();
84  mask_assign(m_scale, tmp) /= scale;
85 
86  mask_assign(m_scale, da_scaled) *= tmp;
87  mask_assign(m_scale, db_scaled) *= tmp;
88 
89  r = scale * STS::squareroot(da_scaled * da_scaled + db_scaled * db_scaled);
90  auto m_roe = roe < 0;
91  mask_assign(m_roe, r) = -r;
92 
93  tmp = STS::one();
94  mask_assign(m_scale, tmp) /= r;
95 
96  mask_assign(m_scale, *c) *= tmp;
97  mask_assign(m_scale, *s) *= tmp;
98 
99  mask_assign(!m_scale, *c) = STS::one();
100  mask_assign(!m_scale, *s) = STS::zero();
101 
102  mask_assign(*c != STS::zero(), z) /= {STS::one(), *c, STS::zero()};
103  mask_assign(!m_scale, z) = STS::zero();
104  mask_assign(m_da, z) = *s;
105 
106  *da = r;
107  *db = z;
108  }
109 };
110 } // namespace details
111 } // namespace Teuchos
112 
113 //namespace Sacado {
114 // namespace MP {
115 namespace Teuchos
116 {
118 template <typename OrdinalType, typename Storage>
119 class BLAS<OrdinalType, Sacado::MP::Vector<Storage>> : public Teuchos::DefaultBLASImpl<OrdinalType, Sacado::MP::Vector<Storage>>
120 {
121 
123  typedef typename Sacado::ValueType<Sacado::MP::Vector<Storage>>::type ValueType;
124  typedef typename Sacado::ScalarType<Sacado::MP::Vector<Storage>>::type scalar_type;
127 
128 public:
129  template <typename alpha_type, typename A_type>
132  const OrdinalType m, const OrdinalType n,
133  const alpha_type &alpha,
134  const A_type *A, const OrdinalType lda,
135  ScalarType *B, const OrdinalType ldb) const
136  {
138 
139  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
140  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
141  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
142  A_type A_zero = ScalarTraits<A_type>::zero();
143  ScalarType B_zero = ScalarTraits<ScalarType>::zero();
144  alpha_type alpha_one = ScalarTraits<alpha_type>::one();
145  ScalarType B_one = ScalarTraits<ScalarType>::one();
146  ScalarType temp;
147  OrdinalType NRowA = m;
148  bool BadArgument = false;
149  bool noUnit = (EDiagChar[diag] == 'N');
150  bool noConj = (ETranspChar[transa] == 'T');
151 
152  if (!(ESideChar[side] == 'L'))
153  {
154  NRowA = n;
155  }
156 
157  // Quick return.
158  if (n == izero || m == izero)
159  {
160  return;
161  }
162  if (m < izero)
163  {
164  std::cout << "BLAS::TRSM Error: M == " << m << std::endl;
165  BadArgument = true;
166  }
167  if (n < izero)
168  {
169  std::cout << "BLAS::TRSM Error: N == " << n << std::endl;
170  BadArgument = true;
171  }
172  if (lda < NRowA)
173  {
174  std::cout << "BLAS::TRSM Error: LDA < " << NRowA << std::endl;
175  BadArgument = true;
176  }
177  if (ldb < m)
178  {
179  std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
180  BadArgument = true;
181  }
182 
183  if (!BadArgument)
184  {
185  int i, j, k;
186  // Set the solution to the zero vector.
187  auto alpha_is_zero = (alpha == alpha_zero);
188  for (j = izero; j < n; j++)
189  {
190  for (i = izero; i < m; i++)
191  {
192  mask_assign(alpha_is_zero, B[j * ldb + i]) = B_zero;
193  }
194  }
195 
196  auto alpha_is_not_one = (alpha != alpha_one);
197 
198  { // Start the operations.
199  if (ESideChar[side] == 'L')
200  {
201  //
202  // Perform computations for OP(A)*X = alpha*B
203  //
204  if (ETranspChar[transa] == 'N')
205  {
206  //
207  // Compute B = alpha*inv( A )*B
208  //
209  if (EUploChar[uplo] == 'U')
210  {
211  // A is upper triangular.
212  for (j = izero; j < n; j++)
213  {
214  // Perform alpha*B if alpha is not 1.
215  for (i = izero; i < m; i++)
216  {
217  mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
218  }
219 
220  // Perform a backsolve for column j of B.
221  for (k = (m - ione); k > -ione; k--)
222  {
223  // If this entry is zero, we don't have to do anything.
224  auto B_is_not_zero = (B[j * ldb + k] != B_zero);
225 
226  if (noUnit)
227  {
228  mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
229  }
230  for (i = izero; i < k; i++)
231  {
232  mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
233  }
234  }
235  }
236  }
237  else
238  { // A is lower triangular.
239  for (j = izero; j < n; j++)
240  {
241  // Perform alpha*B if alpha is not 1.
242  for (i = izero; i < m; i++)
243  {
244  mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
245  }
246  // Perform a forward solve for column j of B.
247  for (k = izero; k < m; k++)
248  {
249  // If this entry is zero, we don't have to do anything.
250  auto B_is_not_zero = (B[j * ldb + k] != B_zero);
251  if (noUnit)
252  {
253  mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
254  }
255  for (i = k + ione; i < m; i++)
256  {
257  mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
258  }
259  }
260  }
261  } // end if (uplo == 'U')
262  } // if (transa =='N')
263  else
264  {
265  //
266  // Compute B = alpha*inv( A' )*B
267  // or B = alpha*inv( conj(A') )*B
268  //
269  if (EUploChar[uplo] == 'U')
270  {
271  // A is upper triangular.
272  for (j = izero; j < n; j++)
273  {
274  for (i = izero; i < m; i++)
275  {
276  temp = alpha * B[j * ldb + i];
277  if (noConj)
278  {
279  for (k = izero; k < i; k++)
280  {
281  temp -= A[i * lda + k] * B[j * ldb + k];
282  }
283  if (noUnit)
284  {
285  temp /= A[i * lda + i];
286  }
287  }
288  else
289  {
290  for (k = izero; k < i; k++)
291  {
292  temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
293  }
294  if (noUnit)
295  {
296  temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
297  }
298  }
299  B[j * ldb + i] = temp;
300  }
301  }
302  }
303  else
304  { // A is lower triangular.
305  for (j = izero; j < n; j++)
306  {
307  for (i = (m - ione); i > -ione; i--)
308  {
309  temp = alpha * B[j * ldb + i];
310  if (noConj)
311  {
312  for (k = i + ione; k < m; k++)
313  {
314  temp -= A[i * lda + k] * B[j * ldb + k];
315  }
316  if (noUnit)
317  {
318  temp /= A[i * lda + i];
319  }
320  }
321  else
322  {
323  for (k = i + ione; k < m; k++)
324  {
325  temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
326  }
327  if (noUnit)
328  {
329  temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
330  }
331  }
332  B[j * ldb + i] = temp;
333  }
334  }
335  }
336  }
337  } // if (side == 'L')
338  else
339  {
340  // side == 'R'
341  //
342  // Perform computations for X*OP(A) = alpha*B
343  //
344  if (ETranspChar[transa] == 'N')
345  {
346  //
347  // Compute B = alpha*B*inv( A )
348  //
349  if (EUploChar[uplo] == 'U')
350  {
351  // A is upper triangular.
352  // Perform a backsolve for column j of B.
353  for (j = izero; j < n; j++)
354  {
355  // Perform alpha*B if alpha is not 1.
356  for (i = izero; i < m; i++)
357  {
358  mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
359  }
360  for (k = izero; k < j; k++)
361  {
362  // If this entry is zero, we don't have to do anything.
363  auto A_is_not_zero = (A[j * lda + k] != A_zero);
364  for (i = izero; i < m; i++)
365  {
366  mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
367  }
368  }
369  if (noUnit)
370  {
371  temp = B_one / A[j * lda + j];
372  for (i = izero; i < m; i++)
373  {
374  B[j * ldb + i] *= temp;
375  }
376  }
377  }
378  }
379  else
380  { // A is lower triangular.
381  for (j = (n - ione); j > -ione; j--)
382  {
383  // Perform alpha*B if alpha is not 1.
384  for (i = izero; i < m; i++)
385  {
386  mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
387  }
388 
389  // Perform a forward solve for column j of B.
390  for (k = j + ione; k < n; k++)
391  {
392  // If this entry is zero, we don't have to do anything.
393  auto A_is_not_zero = (A[j * lda + k] != A_zero);
394  for (i = izero; i < m; i++)
395  {
396  mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
397  }
398  }
399  if (noUnit)
400  {
401  temp = B_one / A[j * lda + j];
402  for (i = izero; i < m; i++)
403  {
404  B[j * ldb + i] *= temp;
405  }
406  }
407  }
408  } // end if (uplo == 'U')
409  } // if (transa =='N')
410  else
411  {
412  //
413  // Compute B = alpha*B*inv( A' )
414  // or B = alpha*B*inv( conj(A') )
415  //
416  if (EUploChar[uplo] == 'U')
417  {
418  // A is upper triangular.
419  for (k = (n - ione); k > -ione; k--)
420  {
421  if (noUnit)
422  {
423  if (noConj)
424  temp = B_one / A[k * lda + k];
425  else
426  temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
427  for (i = izero; i < m; i++)
428  {
429  B[k * ldb + i] *= temp;
430  }
431  }
432  for (j = izero; j < k; j++)
433  {
434  auto A_is_not_zero = (A[k * lda + j] != A_zero);
435  if (noConj)
436  mask_assign(A_is_not_zero, temp) = A[k * lda + j];
437  else
438  mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
439  for (i = izero; i < m; i++)
440  {
441  mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
442  }
443  }
444  for (i = izero; i < m; i++)
445  {
446  mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
447  }
448  }
449  }
450  else
451  { // A is lower triangular.
452  for (k = izero; k < n; k++)
453  {
454  if (noUnit)
455  {
456  if (noConj)
457  temp = B_one / A[k * lda + k];
458  else
459  temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
460  for (i = izero; i < m; i++)
461  {
462  B[k * ldb + i] *= temp;
463  }
464  }
465  for (j = k + ione; j < n; j++)
466  {
467  auto A_is_not_zero = (A[k * lda + j] != A_zero);
468  if (noConj)
469  mask_assign(A_is_not_zero, temp) = A[k * lda + j];
470  else
471  mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
472  for (i = izero; i < m; i++)
473  {
474  mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
475  }
476  }
477  for (i = izero; i < m; i++)
478  {
479  mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
480  }
481  }
482  }
483  }
484  }
485  }
486  }
487  }
488 }; // class BLAS
489 // }
490 //}
491 
492 } // namespace Teuchos
493 
494 #endif // _TEUCHOS_BLAS__MP_VECTOR_HPP_
Teuchos::ScalarTraits< Sacado::MP::Vector< Storage > >::magnitudeType MagnitudeType
KOKKOS_INLINE_FUNCTION MaskedAssign< scalar > mask_assign(bool b, scalar *s)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
static T conjugate(T a)
Sacado::ScalarType< Sacado::MP::Vector< Storage > >::type scalar_type
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
void ROTG(ScalarType *da, ScalarType *db, ScalarType *c, ScalarType *s) const
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Teuchos::DefaultBLASImpl< OrdinalType, Sacado::MP::Vector< Storage > > BLASType
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
Sacado::ValueType< Sacado::MP::Vector< Storage > >::type ValueType