10 #ifndef _TEUCHOS_BLAS_MP_VECTOR_HPP_
11 #define _TEUCHOS_BLAS_MP_VECTOR_HPP_
22 template <
typename Storage>
37 ScalarType r, roe, scale, z, da_scaled, db_scaled;
38 auto m_da = (STS::magnitude(*da) > STS::magnitude(*db));
41 scale = STS::magnitude(*da) + STS::magnitude(*db);
43 auto m_scale = scale != STS::zero();
57 r = scale * STS::squareroot(da_scaled * da_scaled + db_scaled * db_scaled);
70 mask_assign(*c != STS::zero(), z) /= {STS::one(), *c, STS::zero()};
86 template <
typename OrdinalType,
typename Storage>
91 typedef typename Sacado::ValueType<Sacado::MP::Vector<Storage>>::type
ValueType;
92 typedef typename Sacado::ScalarType<Sacado::MP::Vector<Storage>>::type
scalar_type;
97 template <
typename alpha_type,
typename A_type>
100 const OrdinalType m,
const OrdinalType n,
101 const alpha_type &alpha,
102 const A_type *
A,
const OrdinalType lda,
113 OrdinalType NRowA = m;
114 bool BadArgument =
false;
124 if (n == izero || m == izero)
130 std::cout <<
"BLAS::TRSM Error: M == " << m << std::endl;
135 std::cout <<
"BLAS::TRSM Error: N == " << n << std::endl;
140 std::cout <<
"BLAS::TRSM Error: LDA < " << NRowA << std::endl;
145 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
153 auto alpha_is_zero = (alpha == alpha_zero);
154 for (j = izero; j < n; j++)
156 for (i = izero; i < m; i++)
158 mask_assign(alpha_is_zero, B[j * ldb + i]) = B_zero;
162 auto alpha_is_not_one = (alpha != alpha_one);
178 for (j = izero; j < n; j++)
181 for (i = izero; i < m; i++)
183 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
187 for (k = (m - ione); k > -ione; k--)
190 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
194 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
196 for (i = izero; i < k; i++)
198 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
205 for (j = izero; j < n; j++)
208 for (i = izero; i < m; i++)
210 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
213 for (k = izero; k < m; k++)
216 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
219 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
221 for (i = k + ione; i < m; i++)
223 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
238 for (j = izero; j < n; j++)
240 for (i = izero; i < m; i++)
242 temp = alpha * B[j * ldb + i];
245 for (k = izero; k < i; k++)
247 temp -= A[i * lda + k] * B[j * ldb + k];
251 temp /= A[i * lda + i];
256 for (k = izero; k < i; k++)
265 B[j * ldb + i] = temp;
271 for (j = izero; j < n; j++)
273 for (i = (m - ione); i > -ione; i--)
275 temp = alpha * B[j * ldb + i];
278 for (k = i + ione; k < m; k++)
280 temp -= A[i * lda + k] * B[j * ldb + k];
284 temp /= A[i * lda + i];
289 for (k = i + ione; k < m; k++)
298 B[j * ldb + i] = temp;
319 for (j = izero; j < n; j++)
322 for (i = izero; i < m; i++)
324 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
326 for (k = izero; k <
j; k++)
329 auto A_is_not_zero = (A[j * lda + k] != A_zero);
330 for (i = izero; i < m; i++)
332 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
337 temp = B_one / A[j * lda +
j];
338 for (i = izero; i < m; i++)
340 B[j * ldb + i] *= temp;
347 for (j = (n - ione); j > -ione; j--)
350 for (i = izero; i < m; i++)
352 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
356 for (k = j + ione; k < n; k++)
359 auto A_is_not_zero = (A[j * lda + k] != A_zero);
360 for (i = izero; i < m; i++)
362 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
367 temp = B_one / A[j * lda +
j];
368 for (i = izero; i < m; i++)
370 B[j * ldb + i] *= temp;
385 for (k = (n - ione); k > -ione; k--)
390 temp = B_one / A[k * lda + k];
393 for (i = izero; i < m; i++)
395 B[k * ldb + i] *= temp;
398 for (j = izero; j < k; j++)
400 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
405 for (i = izero; i < m; i++)
407 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
410 for (i = izero; i < m; i++)
412 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
418 for (k = izero; k < n; k++)
423 temp = B_one / A[k * lda + k];
426 for (i = izero; i < m; i++)
428 B[k * ldb + i] *= temp;
431 for (j = k + ione; j < n; j++)
433 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
438 for (i = izero; i < m; i++)
440 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
443 for (i = izero; i < m; i++)
445 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
460 #endif // _TEUCHOS_BLAS__MP_VECTOR_HPP_
Teuchos::ScalarTraits< Sacado::MP::Vector< Storage > >::magnitudeType MagnitudeType
KOKKOS_INLINE_FUNCTION MaskedAssign< scalar > mask_assign(bool b, scalar *s)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
Sacado::MP::Vector< Storage > ScalarType
Sacado::ScalarType< Sacado::MP::Vector< Storage > >::type scalar_type
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
void ROTG(ScalarType *da, ScalarType *db, ScalarType *c, ScalarType *s) const
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Teuchos::DefaultBLASImpl< OrdinalType, Sacado::MP::Vector< Storage > > BLASType
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
Sacado::ValueType< Sacado::MP::Vector< Storage > >::type ValueType
Sacado::MP::Vector< Storage > ScalarType