15 #ifndef __INTREPID2_KERNELS_HPP__
16 #define __INTREPID2_KERNELS_HPP__
18 #include "Intrepid2_ConfigDefs.hpp"
23 #include "Kokkos_Core.hpp"
30 template<
typename ScalarType,
34 KOKKOS_INLINE_FUNCTION
36 gemm_trans_notrans(
const ScalarType alpha,
39 const ScalarType beta,
47 for (ordinal_type i=0;i<m;++i)
48 for (ordinal_type j=0;j<n;++j) {
50 for (ordinal_type l=0;l<k;++l)
51 C(i,j) += alpha*A(l,i)*B(l,j);
55 template<
typename ScalarType,
59 KOKKOS_INLINE_FUNCTION
61 gemm_notrans_trans(
const ScalarType alpha,
64 const ScalarType beta,
72 for (ordinal_type i=0;i<m;++i)
73 for (ordinal_type j=0;j<n;++j) {
75 for (ordinal_type l=0;l<k;++l)
76 C(i,j) += alpha*A(i,l)*B(j,l);
80 template<
typename ScalarType,
84 KOKKOS_INLINE_FUNCTION
86 gemv_trans(
const ScalarType alpha,
89 const ScalarType beta,
96 for (ordinal_type i=0;i<m;++i) {
98 for (ordinal_type j=0;j<n;++j)
99 y(i) += alpha*A(j,i)*x(j);
103 template<
typename ScalarType,
107 KOKKOS_INLINE_FUNCTION
109 gemv_notrans(
const ScalarType alpha,
112 const ScalarType beta,
113 const yViewType &y) {
119 for (ordinal_type i=0;i<m;++i) {
121 for (ordinal_type j=0;j<n;++j)
122 y(i) += alpha*A(i,j)*x(j);
126 template<
typename matViewType>
127 KOKKOS_INLINE_FUNCTION
128 static typename matViewType::non_const_value_type
129 determinant(
const matViewType &mat) {
130 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1),
"mat should be a square matrix.");
131 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3,
"Higher dimensions (> 3) are not supported.");
133 typename matViewType::non_const_value_type r_val(0);
134 const int m = mat.extent(0);
140 r_val = ( mat(0,0) * mat(1,1) -
141 mat(0,1) * mat(1,0) );
144 r_val = ( mat(0,0) * mat(1,1) * mat(2,2) +
145 mat(1,0) * mat(2,1) * mat(0,2) +
146 mat(2,0) * mat(0,1) * mat(1,2) -
147 mat(2,0) * mat(1,1) * mat(0,2) -
148 mat(0,0) * mat(2,1) * mat(1,2) -
149 mat(1,0) * mat(0,1) * mat(2,2) );
155 template<
typename matViewType,
156 typename invViewType>
157 KOKKOS_INLINE_FUNCTION
159 inverse(
const invViewType &inv,
160 const matViewType &mat) {
161 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1),
"mat should be a square matrix.");
162 INTREPID2_TEST_FOR_ABORT(inv.extent(0) != inv.extent(1),
"inv should be a square matrix.");
163 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != inv.extent(0),
"mat and inv must have the same dimension.");
164 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3,
"Higher dimensions (> 3) are not supported.");
165 INTREPID2_TEST_FOR_ABORT(mat.data() == inv.data(),
"mat and inv must have different data pointer.");
167 const auto val = determinant(mat);
168 const int m = mat.extent(0);
171 inv(0,0) = 1.0/mat(0,0);
175 inv(0,0) = mat(1,1)/val;
176 inv(1,1) = mat(0,0)/val;
178 inv(1,0) = - mat(1,0)/val;
179 inv(0,1) = - mat(0,1)/val;
184 const auto val0 = mat(1,1)*mat(2,2) - mat(2,1)*mat(1,2);
185 const auto val1 = - mat(1,0)*mat(2,2) + mat(2,0)*mat(1,2);
186 const auto val2 = mat(1,0)*mat(2,1) - mat(2,0)*mat(1,1);
193 const auto val0 = mat(2,1)*mat(0,2) - mat(0,1)*mat(2,2);
194 const auto val1 = mat(0,0)*mat(2,2) - mat(2,0)*mat(0,2);
195 const auto val2 = - mat(0,0)*mat(2,1) + mat(2,0)*mat(0,1);
202 const auto val0 = mat(0,1)*mat(1,2) - mat(1,1)*mat(0,2);
203 const auto val1 = - mat(0,0)*mat(1,2) + mat(1,0)*mat(0,2);
204 const auto val2 = mat(0,0)*mat(1,1) - mat(1,0)*mat(0,1);
215 template<
typename ScalarType,
219 KOKKOS_INLINE_FUNCTION
221 z_is_axby(
const zViewType &z,
222 const ScalarType alpha,
224 const ScalarType beta,
225 const yViewType &y) {
230 for (ordinal_type i=0;i<m;++i)
231 z(i) = alpha*x(i) + beta*y(i);
234 template<
typename AViewType>
235 KOKKOS_INLINE_FUNCTION
237 norm(
const AViewType &A,
const ENorm normType) {
238 typedef typename AViewType::non_const_value_type value_type;
239 const ordinal_type m = A.extent(0), n = A.extent(1);
243 for (ordinal_type i=0;i<m;++i)
244 for (ordinal_type j=0;j<n;++j)
245 r_val += A.access(i,j)*A.access(i,j);
250 for (ordinal_type i=0;i<m;++i)
251 for (ordinal_type j=0;j<n;++j) {
253 r_val = (r_val < current ? current : r_val);
258 for (ordinal_type i=0;i<m;++i)
259 for (ordinal_type j=0;j<n;++j)
264 Kokkos::abort(
"norm type is not supported");
271 template<
typename dstViewType,
272 typename srcViewType>
273 KOKKOS_INLINE_FUNCTION
275 copy(
const dstViewType &dst,
const srcViewType &src) {
276 if (dst.data() != src.data()) {
277 const ordinal_type m = dst.extent(0), n = dst.extent(1);
278 for (ordinal_type i=0;i<m;++i)
279 for (ordinal_type j=0;j<n;++j)
280 dst.access(i,j) = src.access(i,j);
285 template<
typename yViewType,
288 KOKKOS_FORCEINLINE_FUNCTION
290 matvec_trans_product_d2(
const yViewType &y,
292 const xViewType &x ) {
293 y(0) = A(0,0)*x(0) + A(1,0)*x(1);
294 y(1) = A(0,1)*x(0) + A(1,1)*x(1);
297 template<
typename yViewType,
300 KOKKOS_FORCEINLINE_FUNCTION
302 matvec_trans_product_d3(
const yViewType &y,
304 const xViewType &x ) {
305 y(0) = A(0,0)*x(0) + A(1,0)*x(1) + A(2,0)*x(2);
306 y(1) = A(0,1)*x(0) + A(1,1)*x(1) + A(2,1)*x(2);
307 y(2) = A(0,2)*x(0) + A(1,2)*x(1) + A(2,2)*x(2);
311 template<
typename yViewType,
314 KOKKOS_FORCEINLINE_FUNCTION
316 matvec_product_d2(
const yViewType &y,
318 const xViewType &x ) {
319 y(0) = A(0,0)*x(0) + A(0,1)*x(1);
320 y(1) = A(1,0)*x(0) + A(1,1)*x(1);
323 template<
typename yViewType,
326 KOKKOS_FORCEINLINE_FUNCTION
328 matvec_product_d3(
const yViewType &y,
330 const xViewType &x ) {
331 y(0) = A(0,0)*x(0) + A(0,1)*x(1) + A(0,2)*x(2);
332 y(1) = A(1,0)*x(0) + A(1,1)*x(1) + A(1,2)*x(2);
333 y(2) = A(2,0)*x(0) + A(2,1)*x(1) + A(2,2)*x(2);
336 template<
typename yViewType,
339 KOKKOS_FORCEINLINE_FUNCTION
341 matvec_product(
const yViewType &y,
343 const xViewType &x ) {
344 switch (y.extent(0)) {
345 case 2: matvec_product_d2(y, A, x);
break;
346 case 3: matvec_product_d3(y, A, x);
break;
348 INTREPID2_TEST_FOR_ABORT(
true,
"matvec only support dimension 2 and 3 (consider to use gemv interface).");
354 template<
typename zViewType,
357 KOKKOS_FORCEINLINE_FUNCTION
359 vector_product_d2(
const zViewType &z,
361 const yViewType &y ) {
362 z(0) = x(0)*y(1) - x(1)*y(0);
365 template<
typename zViewType,
368 KOKKOS_FORCEINLINE_FUNCTION
370 vector_product_d3(
const zViewType &z,
372 const yViewType &y ) {
373 z(0) = x(1)*y(2) - x(2)*y(1);
374 z(1) = x(2)*y(0) - x(0)*y(2);
375 z(2) = x(0)*y(1) - x(1)*y(0);
383 template<
typename xViewType,
385 KOKKOS_FORCEINLINE_FUNCTION
386 static typename xViewType::value_type
387 dot(
const xViewType x,
388 const yViewType y ) {
389 typename xViewType::value_type r_val(0);
390 ordinal_type i = 0, iend = x.extent(0);
392 r_val += ( x(i )*y(i ) +
402 template<
typename xViewType,
404 KOKKOS_FORCEINLINE_FUNCTION
405 static typename xViewType::value_type
406 dot_d2(
const xViewType x,
407 const yViewType y ) {
408 return ( x(0)*y(0) + x(1)*y(1) );
411 template<
typename xViewType,
413 KOKKOS_FORCEINLINE_FUNCTION
414 static typename xViewType::value_type
415 dot_d3(
const xViewType x,
416 const yViewType y ) {
417 return ( x(0)*y(0) + x(1)*y(1) + x(2)*y(2) );
420 template<
typename AViewType,
421 typename alphaScalarType>
422 KOKKOS_FORCEINLINE_FUNCTION
424 scale_mat( AViewType &A,
425 const alphaScalarType alpha ) {
430 for (ordinal_type i=0;i<iend;++i)
431 for (ordinal_type j=0;j<jend;++j)
435 template<
typename AViewType,
436 typename alphaScalarType>
437 KOKKOS_FORCEINLINE_FUNCTION
439 inv_scale_mat( AViewType &A,
440 const alphaScalarType alpha ) {
445 for (ordinal_type i=0;i<iend;++i)
446 for (ordinal_type j=0;j<jend;++j)
450 template<
typename AViewType,
451 typename alphaScalarType,
453 KOKKOS_FORCEINLINE_FUNCTION
455 scalar_mult_mat( AViewType &A,
456 const alphaScalarType alpha,
457 const BViewType &B ) {
462 for (ordinal_type i=0;i<iend;++i)
463 for (ordinal_type j=0;j<jend;++j)
464 A(i,j) = alpha*B(i,j);
467 template<
typename AViewType,
468 typename alphaScalarType,
470 KOKKOS_FORCEINLINE_FUNCTION
472 inv_scalar_mult_mat( AViewType &A,
473 const alphaScalarType alpha,
474 const BViewType &B ) {
479 for (ordinal_type i=0;i<iend;++i)
480 for (ordinal_type j=0;j<jend;++j)
481 A(i,j) = B(i,j)/alpha;
Header function for Intrepid2::Util class and other utility functions.
Contains definitions of custom data types in Intrepid2.