48 #ifndef __INTREPID2_KERNELS_HPP__
49 #define __INTREPID2_KERNELS_HPP__
51 #include "Intrepid2_ConfigDefs.hpp"
56 #include "Kokkos_Core.hpp"
63 template<
typename ScalarType,
67 KOKKOS_INLINE_FUNCTION
69 gemm_trans_notrans(
const ScalarType alpha,
72 const ScalarType beta,
80 for (ordinal_type i=0;i<m;++i)
81 for (ordinal_type j=0;j<n;++j) {
83 for (ordinal_type l=0;l<k;++l)
84 C(i,j) += alpha*A(l,i)*B(l,j);
88 template<
typename ScalarType,
92 KOKKOS_INLINE_FUNCTION
94 gemm_notrans_trans(
const ScalarType alpha,
97 const ScalarType beta,
105 for (ordinal_type i=0;i<m;++i)
106 for (ordinal_type j=0;j<n;++j) {
108 for (ordinal_type l=0;l<k;++l)
109 C(i,j) += alpha*A(i,l)*B(j,l);
113 template<
typename ScalarType,
117 KOKKOS_INLINE_FUNCTION
119 gemv_trans(
const ScalarType alpha,
122 const ScalarType beta,
123 const yViewType &y) {
129 for (ordinal_type i=0;i<m;++i) {
131 for (ordinal_type j=0;j<n;++j)
132 y(i) += alpha*A(j,i)*x(j);
136 template<
typename ScalarType,
140 KOKKOS_INLINE_FUNCTION
142 gemv_notrans(
const ScalarType alpha,
145 const ScalarType beta,
146 const yViewType &y) {
152 for (ordinal_type i=0;i<m;++i) {
154 for (ordinal_type j=0;j<n;++j)
155 y(i) += alpha*A(i,j)*x(j);
159 template<
typename matViewType>
160 KOKKOS_INLINE_FUNCTION
161 static typename matViewType::non_const_value_type
162 determinant(
const matViewType &mat) {
163 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1),
"mat should be a square matrix.");
164 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3,
"Higher dimensions (> 3) are not supported.");
166 typename matViewType::non_const_value_type r_val(0);
167 const int m = mat.extent(0);
173 r_val = ( mat(0,0) * mat(1,1) -
174 mat(0,1) * mat(1,0) );
177 r_val = ( mat(0,0) * mat(1,1) * mat(2,2) +
178 mat(1,0) * mat(2,1) * mat(0,2) +
179 mat(2,0) * mat(0,1) * mat(1,2) -
180 mat(2,0) * mat(1,1) * mat(0,2) -
181 mat(0,0) * mat(2,1) * mat(1,2) -
182 mat(1,0) * mat(0,1) * mat(2,2) );
188 template<
typename matViewType,
189 typename invViewType>
190 KOKKOS_INLINE_FUNCTION
192 inverse(
const invViewType &inv,
193 const matViewType &mat) {
194 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1),
"mat should be a square matrix.");
195 INTREPID2_TEST_FOR_ABORT(inv.extent(0) != inv.extent(1),
"inv should be a square matrix.");
196 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != inv.extent(0),
"mat and inv must have the same dimension.");
197 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3,
"Higher dimensions (> 3) are not supported.");
198 INTREPID2_TEST_FOR_ABORT(mat.data() == inv.data(),
"mat and inv must have different data pointer.");
200 const auto val = determinant(mat);
201 const int m = mat.extent(0);
204 inv(0,0) = 1.0/mat(0,0);
208 inv(0,0) = mat(1,1)/val;
209 inv(1,1) = mat(0,0)/val;
211 inv(1,0) = - mat(1,0)/val;
212 inv(0,1) = - mat(0,1)/val;
217 const auto val0 = mat(1,1)*mat(2,2) - mat(2,1)*mat(1,2);
218 const auto val1 = - mat(1,0)*mat(2,2) + mat(2,0)*mat(1,2);
219 const auto val2 = mat(1,0)*mat(2,1) - mat(2,0)*mat(1,1);
226 const auto val0 = mat(2,1)*mat(0,2) - mat(0,1)*mat(2,2);
227 const auto val1 = mat(0,0)*mat(2,2) - mat(2,0)*mat(0,2);
228 const auto val2 = - mat(0,0)*mat(2,1) + mat(2,0)*mat(0,1);
235 const auto val0 = mat(0,1)*mat(1,2) - mat(1,1)*mat(0,2);
236 const auto val1 = - mat(0,0)*mat(1,2) + mat(1,0)*mat(0,2);
237 const auto val2 = mat(0,0)*mat(1,1) - mat(1,0)*mat(0,1);
248 template<
typename ScalarType,
252 KOKKOS_INLINE_FUNCTION
254 z_is_axby(
const zViewType &z,
255 const ScalarType alpha,
257 const ScalarType beta,
258 const yViewType &y) {
263 for (ordinal_type i=0;i<m;++i)
264 z(i) = alpha*x(i) + beta*y(i);
267 template<
typename AViewType>
268 KOKKOS_INLINE_FUNCTION
270 norm(
const AViewType &A,
const ENorm normType) {
271 typedef typename AViewType::non_const_value_type value_type;
272 const ordinal_type m = A.extent(0), n = A.extent(1);
276 for (ordinal_type i=0;i<m;++i)
277 for (ordinal_type j=0;j<n;++j)
278 r_val += A(i,j)*A(i,j);
283 for (ordinal_type i=0;i<m;++i)
284 for (ordinal_type j=0;j<n;++j) {
286 r_val = (r_val < current ? current : r_val);
291 for (ordinal_type i=0;i<m;++i)
292 for (ordinal_type j=0;j<n;++j)
297 Kokkos::abort(
"norm type is not supported");
304 template<
typename dstViewType,
305 typename srcViewType>
306 KOKKOS_INLINE_FUNCTION
308 copy(
const dstViewType &dst,
const srcViewType &src) {
309 if (dst.data() != src.data()) {
310 const ordinal_type m = dst.extent(0), n = dst.extent(1);
311 for (ordinal_type i=0;i<m;++i)
312 for (ordinal_type j=0;j<n;++j)
318 template<
typename yViewType,
321 KOKKOS_FORCEINLINE_FUNCTION
323 matvec_trans_product_d2(
const yViewType &y,
325 const xViewType &x ) {
326 y(0) = A(0,0)*x(0) + A(1,0)*x(1);
327 y(1) = A(0,1)*x(0) + A(1,1)*x(1);
330 template<
typename yViewType,
333 KOKKOS_FORCEINLINE_FUNCTION
335 matvec_trans_product_d3(
const yViewType &y,
337 const xViewType &x ) {
338 y(0) = A(0,0)*x(0) + A(1,0)*x(1) + A(2,0)*x(2);
339 y(1) = A(0,1)*x(0) + A(1,1)*x(1) + A(2,1)*x(2);
340 y(2) = A(0,2)*x(0) + A(1,2)*x(1) + A(2,2)*x(2);
344 template<
typename yViewType,
347 KOKKOS_FORCEINLINE_FUNCTION
349 matvec_product_d2(
const yViewType &y,
351 const xViewType &x ) {
352 y(0) = A(0,0)*x(0) + A(0,1)*x(1);
353 y(1) = A(1,0)*x(0) + A(1,1)*x(1);
356 template<
typename yViewType,
359 KOKKOS_FORCEINLINE_FUNCTION
361 matvec_product_d3(
const yViewType &y,
363 const xViewType &x ) {
364 y(0) = A(0,0)*x(0) + A(0,1)*x(1) + A(0,2)*x(2);
365 y(1) = A(1,0)*x(0) + A(1,1)*x(1) + A(1,2)*x(2);
366 y(2) = A(2,0)*x(0) + A(2,1)*x(1) + A(2,2)*x(2);
369 template<
typename yViewType,
372 KOKKOS_FORCEINLINE_FUNCTION
374 matvec_product(
const yViewType &y,
376 const xViewType &x ) {
377 switch (y.extent(0)) {
378 case 2: matvec_product_d2(y, A, x);
break;
379 case 3: matvec_product_d3(y, A, x);
break;
381 INTREPID2_TEST_FOR_ABORT(
true,
"matvec only support dimension 2 and 3 (consider to use gemv interface).");
387 template<
typename zViewType,
390 KOKKOS_FORCEINLINE_FUNCTION
392 vector_product_d2(
const zViewType &z,
394 const yViewType &y ) {
395 z(0) = x(0)*y(1) - x(1)*y(0);
398 template<
typename zViewType,
401 KOKKOS_FORCEINLINE_FUNCTION
403 vector_product_d3(
const zViewType &z,
405 const yViewType &y ) {
406 z(0) = x(1)*y(2) - x(2)*y(1);
407 z(1) = x(2)*y(0) - x(0)*y(2);
408 z(2) = x(0)*y(1) - x(1)*y(0);
416 template<
typename xViewType,
418 KOKKOS_FORCEINLINE_FUNCTION
419 static typename xViewType::value_type
420 dot(
const xViewType x,
421 const yViewType y ) {
422 typename xViewType::value_type r_val(0);
423 ordinal_type i = 0, iend = x.extent(0);
425 r_val += ( x(i )*y(i ) +
435 template<
typename xViewType,
437 KOKKOS_FORCEINLINE_FUNCTION
438 static typename xViewType::value_type
439 dot_d2(
const xViewType x,
440 const yViewType y ) {
441 return ( x(0)*y(0) + x(1)*y(1) );
444 template<
typename xViewType,
446 KOKKOS_FORCEINLINE_FUNCTION
447 static typename xViewType::value_type
448 dot_d3(
const xViewType x,
449 const yViewType y ) {
450 return ( x(0)*y(0) + x(1)*y(1) + x(2)*y(2) );
453 template<
typename AViewType,
454 typename alphaScalarType>
455 KOKKOS_FORCEINLINE_FUNCTION
457 scale_mat( AViewType &A,
458 const alphaScalarType alpha ) {
463 for (ordinal_type i=0;i<iend;++i)
464 for (ordinal_type j=0;j<jend;++j)
468 template<
typename AViewType,
469 typename alphaScalarType>
470 KOKKOS_FORCEINLINE_FUNCTION
472 inv_scale_mat( AViewType &A,
473 const alphaScalarType alpha ) {
478 for (ordinal_type i=0;i<iend;++i)
479 for (ordinal_type j=0;j<jend;++j)
483 template<
typename AViewType,
484 typename alphaScalarType,
486 KOKKOS_FORCEINLINE_FUNCTION
488 scalar_mult_mat( AViewType &A,
489 const alphaScalarType alpha,
490 const BViewType &B ) {
495 for (ordinal_type i=0;i<iend;++i)
496 for (ordinal_type j=0;j<jend;++j)
497 A(i,j) = alpha*B(i,j);
500 template<
typename AViewType,
501 typename alphaScalarType,
503 KOKKOS_FORCEINLINE_FUNCTION
505 inv_scalar_mult_mat( AViewType &A,
506 const alphaScalarType alpha,
507 const BViewType &B ) {
512 for (ordinal_type i=0;i<iend;++i)
513 for (ordinal_type j=0;j<jend;++j)
514 A(i,j) = B(i,j)/alpha;
Header function for Intrepid2::Util class and other utility functions.
Contains definitions of custom data types in Intrepid2.