42 #ifndef _TEUCHOS_BLAS_MP_VECTOR_HPP_
43 #define _TEUCHOS_BLAS_MP_VECTOR_HPP_
54 template <
typename Storage>
69 ScalarType r, roe, scale, z, da_scaled, db_scaled;
70 auto m_da = (STS::magnitude(*da) > STS::magnitude(*db));
73 scale = STS::magnitude(*da) + STS::magnitude(*db);
75 auto m_scale = scale != STS::zero();
89 r = scale * STS::squareroot(da_scaled * da_scaled + db_scaled * db_scaled);
102 mask_assign(*c != STS::zero(), z) /= {STS::one(), *c, STS::zero()};
118 template <
typename OrdinalType,
typename Storage>
123 typedef typename Sacado::ValueType<Sacado::MP::Vector<Storage>>::type
ValueType;
124 typedef typename Sacado::ScalarType<Sacado::MP::Vector<Storage>>::type
scalar_type;
129 template <
typename alpha_type,
typename A_type>
132 const OrdinalType m,
const OrdinalType n,
133 const alpha_type &alpha,
134 const A_type *
A,
const OrdinalType lda,
147 OrdinalType NRowA = m;
148 bool BadArgument =
false;
158 if (n == izero || m == izero)
164 std::cout <<
"BLAS::TRSM Error: M == " << m << std::endl;
169 std::cout <<
"BLAS::TRSM Error: N == " << n << std::endl;
174 std::cout <<
"BLAS::TRSM Error: LDA < " << NRowA << std::endl;
179 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
187 auto alpha_is_zero = (alpha == alpha_zero);
188 for (j = izero; j < n; j++)
190 for (i = izero; i < m; i++)
192 mask_assign(alpha_is_zero, B[j * ldb + i]) = B_zero;
196 auto alpha_is_not_one = (alpha != alpha_one);
212 for (j = izero; j < n; j++)
215 for (i = izero; i < m; i++)
217 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
221 for (k = (m - ione); k > -ione; k--)
224 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
228 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
230 for (i = izero; i < k; i++)
232 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
239 for (j = izero; j < n; j++)
242 for (i = izero; i < m; i++)
244 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
247 for (k = izero; k < m; k++)
250 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
253 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
255 for (i = k + ione; i < m; i++)
257 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
272 for (j = izero; j < n; j++)
274 for (i = izero; i < m; i++)
276 temp = alpha * B[j * ldb + i];
279 for (k = izero; k < i; k++)
281 temp -= A[i * lda + k] * B[j * ldb + k];
285 temp /= A[i * lda + i];
290 for (k = izero; k < i; k++)
299 B[j * ldb + i] = temp;
305 for (j = izero; j < n; j++)
307 for (i = (m - ione); i > -ione; i--)
309 temp = alpha * B[j * ldb + i];
312 for (k = i + ione; k < m; k++)
314 temp -= A[i * lda + k] * B[j * ldb + k];
318 temp /= A[i * lda + i];
323 for (k = i + ione; k < m; k++)
332 B[j * ldb + i] = temp;
353 for (j = izero; j < n; j++)
356 for (i = izero; i < m; i++)
358 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
360 for (k = izero; k <
j; k++)
363 auto A_is_not_zero = (A[j * lda + k] != A_zero);
364 for (i = izero; i < m; i++)
366 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
371 temp = B_one / A[j * lda +
j];
372 for (i = izero; i < m; i++)
374 B[j * ldb + i] *= temp;
381 for (j = (n - ione); j > -ione; j--)
384 for (i = izero; i < m; i++)
386 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
390 for (k = j + ione; k < n; k++)
393 auto A_is_not_zero = (A[j * lda + k] != A_zero);
394 for (i = izero; i < m; i++)
396 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
401 temp = B_one / A[j * lda +
j];
402 for (i = izero; i < m; i++)
404 B[j * ldb + i] *= temp;
419 for (k = (n - ione); k > -ione; k--)
424 temp = B_one / A[k * lda + k];
427 for (i = izero; i < m; i++)
429 B[k * ldb + i] *= temp;
432 for (j = izero; j < k; j++)
434 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
439 for (i = izero; i < m; i++)
441 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
444 for (i = izero; i < m; i++)
446 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
452 for (k = izero; k < n; k++)
457 temp = B_one / A[k * lda + k];
460 for (i = izero; i < m; i++)
462 B[k * ldb + i] *= temp;
465 for (j = k + ione; j < n; j++)
467 auto A_is_not_zero = (A[k * lda +
j] != A_zero);
472 for (i = izero; i < m; i++)
474 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
477 for (i = izero; i < m; i++)
479 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
494 #endif // _TEUCHOS_BLAS__MP_VECTOR_HPP_
Teuchos::ScalarTraits< Sacado::MP::Vector< Storage > >::magnitudeType MagnitudeType
KOKKOS_INLINE_FUNCTION MaskedAssign< scalar > mask_assign(bool b, scalar *s)
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
Sacado::MP::Vector< Storage > ScalarType
Sacado::ScalarType< Sacado::MP::Vector< Storage > >::type scalar_type
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
void ROTG(ScalarType *da, ScalarType *db, ScalarType *c, ScalarType *s) const
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Teuchos::DefaultBLASImpl< OrdinalType, Sacado::MP::Vector< Storage > > BLASType
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
Sacado::ValueType< Sacado::MP::Vector< Storage > >::type ValueType
Sacado::MP::Vector< Storage > ScalarType