Intrepid2
Intrepid2_Kernels.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Intrepid2 Package
5 // Copyright (2007) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Kyungjoo Kim (kyukim@sandia.gov), or
38 // Mauro Perego (mperego@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 
48 #ifndef __INTREPID2_KERNELS_HPP__
49 #define __INTREPID2_KERNELS_HPP__
50 
51 #include "Intrepid2_ConfigDefs.hpp"
52 
53 #include "Intrepid2_Types.hpp"
54 #include "Intrepid2_Utils.hpp"
55 
56 #include "Kokkos_Core.hpp"
57 
58 namespace Intrepid2 {
59 
60  namespace Kernels {
61 
62  struct Serial {
63  template<typename ScalarType,
64  typename AViewType,
65  typename BViewType,
66  typename CViewType>
67  KOKKOS_INLINE_FUNCTION
68  static void
69  gemm_trans_notrans(const ScalarType alpha,
70  const AViewType &A,
71  const BViewType &B,
72  const ScalarType beta,
73  const CViewType &C) {
74  //C = beta*C + alpha * A'*B
75  const ordinal_type
76  m = C.extent(0),
77  n = C.extent(1),
78  k = B.extent(0);
79 
80  for (ordinal_type i=0;i<m;++i)
81  for (ordinal_type j=0;j<n;++j) {
82  C(i,j) *= beta;
83  for (ordinal_type l=0;l<k;++l)
84  C(i,j) += alpha*A(l,i)*B(l,j);
85  }
86  }
87 
88  template<typename ScalarType,
89  typename AViewType,
90  typename BViewType,
91  typename CViewType>
92  KOKKOS_INLINE_FUNCTION
93  static void
94  gemm_notrans_trans(const ScalarType alpha,
95  const AViewType &A,
96  const BViewType &B,
97  const ScalarType beta,
98  const CViewType &C) {
99  //C = beta*C + alpha * A*B'
100  const ordinal_type
101  m = C.extent(0),
102  n = C.extent(1),
103  k = A.extent(1);
104 
105  for (ordinal_type i=0;i<m;++i)
106  for (ordinal_type j=0;j<n;++j) {
107  C(i,j) *= beta;
108  for (ordinal_type l=0;l<k;++l)
109  C(i,j) += alpha*A(i,l)*B(j,l);
110  }
111  }
112 
113  template<typename ScalarType,
114  typename AViewType,
115  typename xViewType,
116  typename yViewType>
117  KOKKOS_INLINE_FUNCTION
118  static void
119  gemv_trans(const ScalarType alpha,
120  const AViewType &A,
121  const xViewType &x,
122  const ScalarType beta,
123  const yViewType &y) {
124  //y = beta*y + alpha * A'*x
125  const ordinal_type
126  m = y.extent(0),
127  n = x.extent(0);
128 
129  for (ordinal_type i=0;i<m;++i) {
130  y(i) *= beta;
131  for (ordinal_type j=0;j<n;++j)
132  y(i) += alpha*A(j,i)*x(j);
133  }
134  }
135 
136  template<typename ScalarType,
137  typename AViewType,
138  typename xViewType,
139  typename yViewType>
140  KOKKOS_INLINE_FUNCTION
141  static void
142  gemv_notrans(const ScalarType alpha,
143  const AViewType &A,
144  const xViewType &x,
145  const ScalarType beta,
146  const yViewType &y) {
147  //y = beta*y + alpha * A*x
148  const ordinal_type
149  m = y.extent(0),
150  n = x.extent(0);
151 
152  for (ordinal_type i=0;i<m;++i) {
153  y(i) *= beta;
154  for (ordinal_type j=0;j<n;++j)
155  y(i) += alpha*A(i,j)*x(j);
156  }
157  }
158 
159  template<typename matViewType>
160  KOKKOS_INLINE_FUNCTION
161  static typename matViewType::non_const_value_type
162  determinant(const matViewType &mat) {
163  INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1), "mat should be a square matrix.");
164  INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3, "Higher dimensions (> 3) are not supported.");
165 
166  typename matViewType::non_const_value_type r_val(0);
167  const int m = mat.extent(0);
168  switch (m) {
169  case 1:
170  r_val = mat(0,0);
171  break;
172  case 2:
173  r_val = ( mat(0,0) * mat(1,1) -
174  mat(0,1) * mat(1,0) );
175  break;
176  case 3:
177  r_val = ( mat(0,0) * mat(1,1) * mat(2,2) +
178  mat(1,0) * mat(2,1) * mat(0,2) +
179  mat(2,0) * mat(0,1) * mat(1,2) -
180  mat(2,0) * mat(1,1) * mat(0,2) -
181  mat(0,0) * mat(2,1) * mat(1,2) -
182  mat(1,0) * mat(0,1) * mat(2,2) );
183  break;
184  }
185  return r_val;
186  }
187 
188  template<typename matViewType,
189  typename invViewType>
190  KOKKOS_INLINE_FUNCTION
191  static void
192  inverse(const invViewType &inv,
193  const matViewType &mat) {
194  INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1), "mat should be a square matrix.");
195  INTREPID2_TEST_FOR_ABORT(inv.extent(0) != inv.extent(1), "inv should be a square matrix.");
196  INTREPID2_TEST_FOR_ABORT(mat.extent(0) != inv.extent(0), "mat and inv must have the same dimension.");
197  INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3, "Higher dimensions (> 3) are not supported.");
198  INTREPID2_TEST_FOR_ABORT(mat.data() == inv.data(), "mat and inv must have different data pointer.");
199 
200  const auto val = determinant(mat);
201  const int m = mat.extent(0);
202  switch (m) {
203  case 1: {
204  inv(0,0) = 1.0/mat(0,0);
205  break;
206  }
207  case 2: {
208  inv(0,0) = mat(1,1)/val;
209  inv(1,1) = mat(0,0)/val;
210 
211  inv(1,0) = - mat(1,0)/val;
212  inv(0,1) = - mat(0,1)/val;
213  break;
214  }
215  case 3: {
216  {
217  const auto val0 = mat(1,1)*mat(2,2) - mat(2,1)*mat(1,2);
218  const auto val1 = - mat(1,0)*mat(2,2) + mat(2,0)*mat(1,2);
219  const auto val2 = mat(1,0)*mat(2,1) - mat(2,0)*mat(1,1);
220 
221  inv(0,0) = val0/val;
222  inv(1,0) = val1/val;
223  inv(2,0) = val2/val;
224  }
225  {
226  const auto val0 = mat(2,1)*mat(0,2) - mat(0,1)*mat(2,2);
227  const auto val1 = mat(0,0)*mat(2,2) - mat(2,0)*mat(0,2);
228  const auto val2 = - mat(0,0)*mat(2,1) + mat(2,0)*mat(0,1);
229 
230  inv(0,1) = val0/val;
231  inv(1,1) = val1/val;
232  inv(2,1) = val2/val;
233  }
234  {
235  const auto val0 = mat(0,1)*mat(1,2) - mat(1,1)*mat(0,2);
236  const auto val1 = - mat(0,0)*mat(1,2) + mat(1,0)*mat(0,2);
237  const auto val2 = mat(0,0)*mat(1,1) - mat(1,0)*mat(0,1);
238 
239  inv(0,2) = val0/val;
240  inv(1,2) = val1/val;
241  inv(2,2) = val2/val;
242  }
243  break;
244  }
245  }
246  }
247 
248  template<typename ScalarType,
249  typename xViewType,
250  typename yViewType,
251  typename zViewType>
252  KOKKOS_INLINE_FUNCTION
253  static void
254  z_is_axby(const zViewType &z,
255  const ScalarType alpha,
256  const xViewType &x,
257  const ScalarType beta,
258  const yViewType &y) {
259  //y = beta*y + alpha*x
260  const ordinal_type
261  m = z.extent(0);
262 
263  for (ordinal_type i=0;i<m;++i)
264  z(i) = alpha*x(i) + beta*y(i);
265  }
266 
267  template<typename AViewType>
268  KOKKOS_INLINE_FUNCTION
269  static double
270  norm(const AViewType &A, const ENorm normType) {
271  typedef typename AViewType::non_const_value_type value_type;
272  const ordinal_type m = A.extent(0), n = A.extent(1);
273  double r_val = 0;
274  switch(normType) {
275  case NORM_TWO:{
276  for (ordinal_type i=0;i<m;++i)
277  for (ordinal_type j=0;j<n;++j)
278  r_val += A(i,j)*A(i,j);
279  r_val = sqrt(r_val);
280  break;
281  }
282  case NORM_INF:{
283  for (ordinal_type i=0;i<m;++i)
284  for (ordinal_type j=0;j<n;++j) {
285  const value_type current = Util<value_type>::abs(A(i,j));
286  r_val = (r_val < current ? current : r_val);
287  }
288  break;
289  }
290  case NORM_ONE:{
291  for (ordinal_type i=0;i<m;++i)
292  for (ordinal_type j=0;j<n;++j)
293  r_val += Util<value_type>::abs(A(i,j));
294  break;
295  }
296  default: {
297  Kokkos::abort("norm type is not supported");
298  break;
299  }
300  }
301  return r_val;
302  }
303 
304  template<typename dstViewType,
305  typename srcViewType>
306  KOKKOS_INLINE_FUNCTION
307  static void
308  copy(const dstViewType &dst, const srcViewType &src) {
309  if (dst.data() != src.data()) {
310  const ordinal_type m = dst.extent(0), n = dst.extent(1);
311  for (ordinal_type i=0;i<m;++i)
312  for (ordinal_type j=0;j<n;++j)
313  dst.access(i,j) = src.access(i,j);
314  }
315  }
316 
317  // y = Ax
318  template<typename yViewType,
319  typename AViewType,
320  typename xViewType>
321  KOKKOS_FORCEINLINE_FUNCTION
322  static void
323  matvec_trans_product_d2( const yViewType &y,
324  const AViewType &A,
325  const xViewType &x ) {
326  y(0) = A(0,0)*x(0) + A(1,0)*x(1);
327  y(1) = A(0,1)*x(0) + A(1,1)*x(1);
328  }
329 
330  template<typename yViewType,
331  typename AViewType,
332  typename xViewType>
333  KOKKOS_FORCEINLINE_FUNCTION
334  static void
335  matvec_trans_product_d3( const yViewType &y,
336  const AViewType &A,
337  const xViewType &x ) {
338  y(0) = A(0,0)*x(0) + A(1,0)*x(1) + A(2,0)*x(2);
339  y(1) = A(0,1)*x(0) + A(1,1)*x(1) + A(2,1)*x(2);
340  y(2) = A(0,2)*x(0) + A(1,2)*x(1) + A(2,2)*x(2);
341  }
342 
343  // y = Ax
344  template<typename yViewType,
345  typename AViewType,
346  typename xViewType>
347  KOKKOS_FORCEINLINE_FUNCTION
348  static void
349  matvec_product_d2( const yViewType &y,
350  const AViewType &A,
351  const xViewType &x ) {
352  y(0) = A(0,0)*x(0) + A(0,1)*x(1);
353  y(1) = A(1,0)*x(0) + A(1,1)*x(1);
354  }
355 
356  template<typename yViewType,
357  typename AViewType,
358  typename xViewType>
359  KOKKOS_FORCEINLINE_FUNCTION
360  static void
361  matvec_product_d3( const yViewType &y,
362  const AViewType &A,
363  const xViewType &x ) {
364  y(0) = A(0,0)*x(0) + A(0,1)*x(1) + A(0,2)*x(2);
365  y(1) = A(1,0)*x(0) + A(1,1)*x(1) + A(1,2)*x(2);
366  y(2) = A(2,0)*x(0) + A(2,1)*x(1) + A(2,2)*x(2);
367  }
368 
369  template<typename yViewType,
370  typename AViewType,
371  typename xViewType>
372  KOKKOS_FORCEINLINE_FUNCTION
373  static void
374  matvec_product( const yViewType &y,
375  const AViewType &A,
376  const xViewType &x ) {
377  switch (y.extent(0)) {
378  case 2: matvec_product_d2(y, A, x); break;
379  case 3: matvec_product_d3(y, A, x); break;
380  default: {
381  INTREPID2_TEST_FOR_ABORT(true, "matvec only support dimension 2 and 3 (consider to use gemv interface).");
382  break;
383  }
384  }
385  }
386 
387  template<typename zViewType,
388  typename xViewType,
389  typename yViewType>
390  KOKKOS_FORCEINLINE_FUNCTION
391  static void
392  vector_product_d2( const zViewType &z,
393  const xViewType &x,
394  const yViewType &y ) {
395  z(0) = x(0)*y(1) - x(1)*y(0);
396  }
397 
398  template<typename zViewType,
399  typename xViewType,
400  typename yViewType>
401  KOKKOS_FORCEINLINE_FUNCTION
402  static void
403  vector_product_d3( const zViewType &z,
404  const xViewType &x,
405  const yViewType &y ) {
406  z(0) = x(1)*y(2) - x(2)*y(1);
407  z(1) = x(2)*y(0) - x(0)*y(2);
408  z(2) = x(0)*y(1) - x(1)*y(0);
409  }
410 
411 
412  };
413 
414 
415 
416  template<typename xViewType,
417  typename yViewType>
418  KOKKOS_FORCEINLINE_FUNCTION
419  static typename xViewType::value_type
420  dot( const xViewType x,
421  const yViewType y ) {
422  typename xViewType::value_type r_val(0);
423  ordinal_type i = 0, iend = x.extent(0);
424  for (;i<iend;i+=4)
425  r_val += ( x(i )*y(i ) +
426  x(i+1)*y(i+1) +
427  x(i+2)*y(i+2) +
428  x(i+3)*y(i+3) );
429  for (;i<iend;++i)
430  r_val += x(i)*y(i);
431 
432  return r_val;
433  }
434 
435  template<typename xViewType,
436  typename yViewType>
437  KOKKOS_FORCEINLINE_FUNCTION
438  static typename xViewType::value_type
439  dot_d2( const xViewType x,
440  const yViewType y ) {
441  return ( x(0)*y(0) + x(1)*y(1) );
442  }
443 
444  template<typename xViewType,
445  typename yViewType>
446  KOKKOS_FORCEINLINE_FUNCTION
447  static typename xViewType::value_type
448  dot_d3( const xViewType x,
449  const yViewType y ) {
450  return ( x(0)*y(0) + x(1)*y(1) + x(2)*y(2) );
451  }
452 
453  template<typename AViewType,
454  typename alphaScalarType>
455  KOKKOS_FORCEINLINE_FUNCTION
456  static void
457  scale_mat( AViewType &A,
458  const alphaScalarType alpha ) {
459  const ordinal_type
460  iend = A.extent(0),
461  jend = A.extent(1);
462 
463  for (ordinal_type i=0;i<iend;++i)
464  for (ordinal_type j=0;j<jend;++j)
465  A(i,j) *= alpha;
466  }
467 
468  template<typename AViewType,
469  typename alphaScalarType>
470  KOKKOS_FORCEINLINE_FUNCTION
471  static void
472  inv_scale_mat( AViewType &A,
473  const alphaScalarType alpha ) {
474  const ordinal_type
475  iend = A.extent(0),
476  jend = A.extent(1);
477 
478  for (ordinal_type i=0;i<iend;++i)
479  for (ordinal_type j=0;j<jend;++j)
480  A(i,j) /= alpha;
481  }
482 
483  template<typename AViewType,
484  typename alphaScalarType,
485  typename BViewType>
486  KOKKOS_FORCEINLINE_FUNCTION
487  static void
488  scalar_mult_mat( AViewType &A,
489  const alphaScalarType alpha,
490  const BViewType &B ) {
491  const ordinal_type
492  iend = A.extent(0),
493  jend = A.extent(1);
494 
495  for (ordinal_type i=0;i<iend;++i)
496  for (ordinal_type j=0;j<jend;++j)
497  A(i,j) = alpha*B(i,j);
498  }
499 
500  template<typename AViewType,
501  typename alphaScalarType,
502  typename BViewType>
503  KOKKOS_FORCEINLINE_FUNCTION
504  static void
505  inv_scalar_mult_mat( AViewType &A,
506  const alphaScalarType alpha,
507  const BViewType &B ) {
508  const ordinal_type
509  iend = A.extent(0),
510  jend = A.extent(1);
511 
512  for (ordinal_type i=0;i<iend;++i)
513  for (ordinal_type j=0;j<jend;++j)
514  A(i,j) = B(i,j)/alpha;
515  }
516 
517  }
518 }
519 
520 #endif
small utility functions
Header function for Intrepid2::Util class and other utility functions.
Contains definitions of custom data types in Intrepid2.