Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Experimental_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
44 
52 
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
56 
57 namespace Tpetra {
58 
70 namespace Experimental {
71 
72 namespace Impl {
73 
80 template<class ViewType1,
81  class ViewType2,
82  const int rank1 = ViewType1::rank>
83 struct AbsMax {
84  static KOKKOS_INLINE_FUNCTION void
85  run (const ViewType2& Y, const ViewType1& X);
86 };
87 
92 template<class ViewType1,
93  class ViewType2>
94 struct AbsMax<ViewType1, ViewType2, 2> {
97  static KOKKOS_INLINE_FUNCTION void
98  run (const ViewType2& Y, const ViewType1& X)
99  {
100  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103  static_assert(! std::is_const<STY>::value,
104  "AbsMax: The type of each entry of Y must be nonconst.");
105  typedef typename std::decay<decltype (X(0,0)) >::type STX;
106  static_assert( std::is_same<STX, STY>::value,
107  "AbsMax: The type of each entry of X and Y must be the same.");
108  typedef Kokkos::Details::ArithTraits<STY> KAT;
109 
110  const int numCols = Y.extent (1);
111  const int numRows = Y.extent (0);
112  for (int j = 0; j < numCols; ++j) {
113  for (int i = 0; i < numRows; ++i) {
114  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
115  const STX X_ij = X(i,j);
116  // NOTE: no std::max (not a CUDA __device__ function); must
117  // cast back up to complex.
118  const auto Y_ij_abs = KAT::abs (Y_ij);
119  const auto X_ij_abs = KAT::abs (X_ij);
120  Y_ij = (Y_ij_abs >= X_ij_abs) ?
121  static_cast<STY> (Y_ij_abs) :
122  static_cast<STY> (X_ij_abs);
123  }
124  }
125  }
126 };
127 
132 template<class ViewType1,
133  class ViewType2>
134 struct AbsMax<ViewType1, ViewType2, 1> {
137  static KOKKOS_INLINE_FUNCTION void
138  run (const ViewType2& Y, const ViewType1& X)
139  {
140  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
142 
143  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144  static_assert(! std::is_const<STY>::value,
145  "AbsMax: The type of each entry of Y must be nonconst.");
146  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147  static_assert( std::is_same<STX, STY>::value,
148  "AbsMax: The type of each entry of X and Y must be the same.");
149  typedef Kokkos::Details::ArithTraits<STY> KAT;
150 
151  const int numRows = Y.extent (0);
152  for (int i = 0; i < numRows; ++i) {
153  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
154  const STX X_i = X(i);
155  // NOTE: no std::max (not a CUDA __device__ function); must
156  // cast back up to complex.
157  const auto Y_i_abs = KAT::abs (Y_i);
158  const auto X_i_abs = KAT::abs (X_i);
159  Y_i = (Y_i_abs >= X_i_abs) ?
160  static_cast<STY> (Y_i_abs) :
161  static_cast<STY> (X_i_abs);
162  }
163  }
164 };
165 
172 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION void
174 absMax (const ViewType2& Y, const ViewType1& X)
175 {
176  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177  "absMax: ViewType1 and ViewType2 must have the same rank.");
178  AbsMax<ViewType1, ViewType2, rank>::run (Y, X);
179 }
180 
185 template<class ViewType,
186  class CoefficientType,
187  class LayoutType = typename ViewType::array_layout,
188  class IndexType = int,
189  const int rank = ViewType::rank>
190 struct SCAL {
191  static KOKKOS_INLINE_FUNCTION void
192  run (const CoefficientType& alpha, const ViewType& x);
193 };
194 
197 template<class ViewType,
198  class CoefficientType,
199  class LayoutType,
200  class IndexType>
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203  static KOKKOS_INLINE_FUNCTION void
204  run (const CoefficientType& alpha, const ViewType& x)
205  {
206  const IndexType numRows = static_cast<IndexType> (x.extent (0));
207  // BLAS _SCAL doesn't check whether alpha is 0.
208  for (IndexType i = 0; i < numRows; ++i) {
209  x(i) = alpha * x(i);
210  }
211  }
212 };
213 
216 template<class ViewType,
217  class CoefficientType,
218  class LayoutType,
219  class IndexType>
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222  static KOKKOS_INLINE_FUNCTION void
223  run (const CoefficientType& alpha, const ViewType& A)
224  {
225  const IndexType numRows = static_cast<IndexType> (A.extent (0));
226  const IndexType numCols = static_cast<IndexType> (A.extent (1));
227 
228  // BLAS _SCAL doesn't check whether alpha is 0.
229  for (IndexType i = 0; i < numRows; ++i) {
230  for (IndexType j = 0; j < numCols; ++j) {
231  A(i,j) = alpha * A(i,j);
232  }
233  }
234  }
235 };
236 
242 template<class ViewType,
243  class CoefficientType,
244  class IndexType>
245 struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
247  static KOKKOS_INLINE_FUNCTION void
248  run (const CoefficientType& alpha, const ViewType& A)
249  {
250  const IndexType N = A.size ();
251  typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252  scalar_type* const A_raw = A.data ();
253 
254  for (IndexType i = 0; i < N; ++i) {
255  A_raw[i] = alpha * A_raw[i];
256  }
257  }
258 };
259 
260 
265 template<class ViewType,
266  class InputType,
267  class LayoutType = typename ViewType::array_layout,
268  class IndexType = int,
269  const int rank = ViewType::rank>
270 struct FILL {
271  static KOKKOS_INLINE_FUNCTION void
272  run (const ViewType& x, const InputType& val);
273 };
274 
277 template<class ViewType,
278  class InputType,
279  class LayoutType,
280  class IndexType>
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282  static KOKKOS_INLINE_FUNCTION void
283  run (const ViewType& x, const InputType& val)
284  {
285  const IndexType numRows = static_cast<IndexType> (x.extent (0));
286  for (IndexType i = 0; i < numRows; ++i) {
287  x(i) = val;
288  }
289  }
290 };
291 
294 template<class ViewType,
295  class InputType,
296  class LayoutType,
297  class IndexType>
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299  static KOKKOS_INLINE_FUNCTION void
300  run (const ViewType& X, const InputType& val)
301  {
302  const IndexType numRows = static_cast<IndexType> (X.extent (0));
303  const IndexType numCols = static_cast<IndexType> (X.extent (1));
304  for (IndexType j = 0; j < numCols; ++j) {
305  for (IndexType i = 0; i < numRows; ++i) {
306  X(i,j) = val;
307  }
308  }
309  }
310 };
311 
316 template<class CoefficientType,
317  class ViewType1,
318  class ViewType2,
319  class LayoutType1 = typename ViewType1::array_layout,
320  class LayoutType2 = typename ViewType2::array_layout,
321  class IndexType = int,
322  const int rank = ViewType1::rank>
323 struct AXPY {
324  static KOKKOS_INLINE_FUNCTION void
325  run (const CoefficientType& alpha,
326  const ViewType1& x,
327  const ViewType2& y);
328 };
329 
332 template<class CoefficientType,
333  class ViewType1,
334  class ViewType2,
335  class LayoutType1,
336  class LayoutType2,
337  class IndexType>
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340  static KOKKOS_INLINE_FUNCTION void
341  run (const CoefficientType& alpha,
342  const ViewType1& x,
343  const ViewType2& y)
344  {
345  using Kokkos::Details::ArithTraits;
346  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
347  "AXPY: x and y must have the same rank.");
348 
349  const IndexType numRows = static_cast<IndexType> (y.extent (0));
350  if (alpha != ArithTraits<CoefficientType>::zero ()) {
351  for (IndexType i = 0; i < numRows; ++i) {
352  y(i) += alpha * x(i);
353  }
354  }
355  }
356 };
357 
360 template<class CoefficientType,
361  class ViewType1,
362  class ViewType2,
363  class LayoutType1,
364  class LayoutType2,
365  class IndexType>
366 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
368  static KOKKOS_INLINE_FUNCTION void
369  run (const CoefficientType& alpha,
370  const ViewType1& X,
371  const ViewType2& Y)
372  {
373  using Kokkos::Details::ArithTraits;
374  static_assert (ViewType1::rank == ViewType2::rank,
375  "AXPY: X and Y must have the same rank.");
376  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
377  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
378 
379  if (alpha != ArithTraits<CoefficientType>::zero ()) {
380  for (IndexType i = 0; i < numRows; ++i) {
381  for (IndexType j = 0; j < numCols; ++j) {
382  Y(i,j) += alpha * X(i,j);
383  }
384  }
385  }
386  }
387 };
388 
392 template<class CoefficientType,
393  class ViewType1,
394  class ViewType2,
395  class IndexType>
396 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
398  static KOKKOS_INLINE_FUNCTION void
399  run (const CoefficientType& alpha,
400  const ViewType1& X,
401  const ViewType2& Y)
402  {
403  using Kokkos::Details::ArithTraits;
404  static_assert (static_cast<int> (ViewType1::rank) ==
405  static_cast<int> (ViewType2::rank),
406  "AXPY: X and Y must have the same rank.");
407  typedef typename std::decay<decltype (X(0,0)) >::type SX;
408  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
409 
410  const IndexType N = static_cast<IndexType> (Y.size ());
411  const SX* const X_raw = X.data ();
412  SY* const Y_raw = Y.data ();
413 
414  if (alpha != ArithTraits<CoefficientType>::zero ()) {
415  for (IndexType i = 0; i < N; ++i) {
416  Y_raw[i] += alpha * X_raw[i];
417  }
418  }
419  }
420 };
421 
425 template<class CoefficientType,
426  class ViewType1,
427  class ViewType2,
428  class IndexType>
429 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
431  static KOKKOS_INLINE_FUNCTION void
432  run (const CoefficientType& alpha,
433  const ViewType1& X,
434  const ViewType2& Y)
435  {
436  using Kokkos::Details::ArithTraits;
437  static_assert (ViewType1::rank == ViewType2::rank,
438  "AXPY: X and Y must have the same rank.");
439  typedef typename std::decay<decltype (X(0,0)) >::type SX;
440  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
441 
442  const IndexType N = static_cast<IndexType> (Y.size ());
443  const SX* const X_raw = X.data ();
444  SY* const Y_raw = Y.data ();
445 
446  if (alpha != ArithTraits<CoefficientType>::zero ()) {
447  for (IndexType i = 0; i < N; ++i) {
448  Y_raw[i] += alpha * X_raw[i];
449  }
450  }
451  }
452 };
453 
458 template<class ViewType1,
459  class ViewType2,
460  class LayoutType1 = typename ViewType1::array_layout,
461  class LayoutType2 = typename ViewType2::array_layout,
462  class IndexType = int,
463  const int rank = ViewType1::rank>
464 struct COPY {
465  static KOKKOS_INLINE_FUNCTION void
466  run (const ViewType1& x, const ViewType2& y);
467 };
468 
471 template<class ViewType1,
472  class ViewType2,
473  class LayoutType1,
474  class LayoutType2,
475  class IndexType>
476 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
478  static KOKKOS_INLINE_FUNCTION void
479  run (const ViewType1& x, const ViewType2& y)
480  {
481  const IndexType numRows = static_cast<IndexType> (x.extent (0));
482  for (IndexType i = 0; i < numRows; ++i) {
483  y(i) = x(i);
484  }
485  }
486 };
487 
490 template<class ViewType1,
491  class ViewType2,
492  class LayoutType1,
493  class LayoutType2,
494  class IndexType>
495 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
497  static KOKKOS_INLINE_FUNCTION void
498  run (const ViewType1& X, const ViewType2& Y)
499  {
500  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
501  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
502 
503  // BLAS _SCAL doesn't check whether alpha is 0.
504  for (IndexType i = 0; i < numRows; ++i) {
505  for (IndexType j = 0; j < numCols; ++j) {
506  Y(i,j) = X(i,j);
507  }
508  }
509  }
510 };
511 
515 template<class ViewType1,
516  class ViewType2,
517  class IndexType>
518 struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
520  static KOKKOS_INLINE_FUNCTION void
521  run (const ViewType1& X, const ViewType2& Y)
522  {
523  typedef typename std::decay<decltype (X(0,0)) >::type SX;
524  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
525 
526  const IndexType N = static_cast<IndexType> (Y.size ());
527  const SX* const X_raw = X.data ();
528  SY* const Y_raw = Y.data ();
529 
530  // BLAS _SCAL doesn't check whether alpha is 0.
531  for (IndexType i = 0; i < N; ++i) {
532  Y_raw[i] = X_raw[i];
533  }
534  }
535 };
536 
540 template<class ViewType1,
541  class ViewType2,
542  class IndexType>
543 struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
545  static KOKKOS_INLINE_FUNCTION void
546  run (const ViewType1& X, const ViewType2& Y)
547  {
548  typedef typename std::decay<decltype (X(0,0)) >::type SX;
549  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
550 
551  const IndexType N = static_cast<IndexType> (Y.size ());
552  const SX* const X_raw = X.data ();
553  SY* const Y_raw = Y.data ();
554 
555  // BLAS _SCAL doesn't check whether alpha is 0.
556  for (IndexType i = 0; i < N; ++i) {
557  Y_raw[i] = X_raw[i];
558  }
559  }
560 };
561 
562 
563 template<class VecType1,
564  class BlkType,
565  class VecType2,
566  class CoeffType,
567  class IndexType = int,
568  class VecLayoutType1 = typename VecType1::array_layout,
569  class BlkLayoutType = typename BlkType::array_layout,
570  class VecLayoutType2 = typename VecType2::array_layout>
571 struct GEMV {
572  static KOKKOS_INLINE_FUNCTION void
573  run (const CoeffType& alpha,
574  const BlkType& A,
575  const VecType1& x,
576  const VecType2& y)
577  {
578  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
579  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
580  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
581  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
582 
583  const IndexType numRows = static_cast<IndexType> (A.extent (0));
584  const IndexType numCols = static_cast<IndexType> (A.extent (1));
585 
586  for (IndexType i = 0; i < numRows; ++i) {
587  y_elt_type y_i = y(i);
588  for (IndexType j = 0; j < numCols; ++j) {
589  y_i += alpha * A(i,j) * x(j);
590  }
591  y(i) = y_i;
592  }
593  }
594 };
595 
596 template<class VecType1,
597  class BlkType,
598  class VecType2,
599  class CoeffType,
600  class IndexType>
601 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
602  Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
603 {
604  static KOKKOS_INLINE_FUNCTION void
605  run (const CoeffType& alpha,
606  const BlkType& A,
607  const VecType1& x,
608  const VecType2& y)
609  {
610  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
611  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
612  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
613  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
614  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
615 
616  const IndexType numRows = static_cast<IndexType> (A.extent (0));
617  const IndexType numCols = static_cast<IndexType> (A.extent (1));
618  const A_elt_type* const A_raw = A.data ();
619 
620  for (IndexType i = 0; i < numRows; ++i) {
621  y_elt_type y_i = y(i);
622  const A_elt_type* const A_i = A_raw + i*numCols;
623  for (IndexType j = 0; j < numCols; ++j) {
624  y_i += alpha * A_i[j] * x(j);
625  }
626  y(i) = y_i;
627  }
628  }
629 };
630 
631 template<class VecType1,
632  class BlkType,
633  class VecType2,
634  class CoeffType,
635  class IndexType>
636 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
637  Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
638 {
639  static KOKKOS_INLINE_FUNCTION void
640  run (const CoeffType& alpha,
641  const BlkType& A,
642  const VecType1& x,
643  const VecType2& y)
644  {
645  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
646  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
647  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
648  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
649 
650  const A_elt_type* const A_raw = A.data ();
651  const IndexType numRows = static_cast<IndexType> (A.extent (0));
652  const IndexType numCols = static_cast<IndexType> (A.extent (1));
653  for (IndexType j = 0; j < numCols; ++j) {
654  const A_elt_type* const A_j = A_raw + j*numRows;
655  for (IndexType i = 0; i < numRows; ++i) {
656  y(i) += alpha * A_j[i] * x(i);
657  }
658  }
659  }
660 };
661 
662 } // namespace Impl
663 
666 template<class ViewType,
667  class CoefficientType,
668  class LayoutType = typename ViewType::array_layout,
669  class IndexType = int,
670  const int rank = ViewType::rank>
671 KOKKOS_INLINE_FUNCTION void
672 SCAL (const CoefficientType& alpha, const ViewType& x)
673 {
675 }
676 
678 template<class ViewType,
679  class InputType,
680  class LayoutType = typename ViewType::array_layout,
681  class IndexType = int,
682  const int rank = ViewType::rank>
683 KOKKOS_INLINE_FUNCTION void
684 FILL (const ViewType& x, const InputType& val)
685 {
687 }
688 
694 template<class CoefficientType,
695  class ViewType1,
696  class ViewType2,
697  class LayoutType1 = typename ViewType1::array_layout,
698  class LayoutType2 = typename ViewType2::array_layout,
699  class IndexType = int,
700  const int rank = ViewType1::rank>
701 KOKKOS_INLINE_FUNCTION void
702 AXPY (const CoefficientType& alpha,
703  const ViewType1& x,
704  const ViewType2& y)
705 {
706  static_assert (static_cast<int> (ViewType1::rank) ==
707  static_cast<int> (ViewType2::rank),
708  "AXPY: x and y must have the same rank.");
709  Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
710  IndexType, rank>::run (alpha, x, y);
711 }
712 
721 template<class ViewType1,
722  class ViewType2,
723  class LayoutType1 = typename ViewType1::array_layout,
724  class LayoutType2 = typename ViewType2::array_layout,
725  class IndexType = int,
726  const int rank = ViewType1::rank>
727 KOKKOS_INLINE_FUNCTION void
728 COPY (const ViewType1& x, const ViewType2& y)
729 {
730  static_assert (static_cast<int> (ViewType1::rank) ==
731  static_cast<int> (ViewType2::rank),
732  "COPY: x and y must have the same rank.");
733  Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
734  rank>::run (x, y);
735 }
736 
748 template<class VecType1,
749  class BlkType,
750  class VecType2,
751  class CoeffType,
752  class IndexType = int>
753 KOKKOS_INLINE_FUNCTION void
754 GEMV (const CoeffType& alpha,
755  const BlkType& A,
756  const VecType1& x,
757  const VecType2& y)
758 {
759  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
760  IndexType>::run (alpha, A, x, y);
761 }
762 
770 template<class ViewType1,
771  class ViewType2,
772  class ViewType3,
773  class CoefficientType,
774  class IndexType = int>
775 KOKKOS_INLINE_FUNCTION void
776 GEMM (const char transA[],
777  const char transB[],
778  const CoefficientType& alpha,
779  const ViewType1& A,
780  const ViewType2& B,
781  const CoefficientType& beta,
782  const ViewType3& C)
783 {
784  // Assert that A, B, and C are in fact matrices
785  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
786  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
787  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
788 
789  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
790  typedef Kokkos::Details::ArithTraits<Scalar> STS;
791  const Scalar ZERO = STS::zero();
792  const Scalar ONE = STS::one();
793 
794  // Get the dimensions
795  IndexType m, n, k;
796  if(transA[0] == 'N' || transA[0] == 'n') {
797  m = static_cast<IndexType> (A.extent (0));
798  n = static_cast<IndexType> (A.extent (1));
799  }
800  else {
801  m = static_cast<IndexType> (A.extent (1));
802  n = static_cast<IndexType> (A.extent (0));
803  }
804  k = static_cast<IndexType> (C.extent (1));
805 
806  // quick return if possible
807  if(alpha == ZERO && beta == ONE)
808  return;
809 
810  // And if alpha equals zero...
811  if(alpha == ZERO) {
812  if(beta == ZERO) {
813  for(IndexType i=0; i<m; i++) {
814  for(IndexType j=0; j<k; j++) {
815  C(i,j) = ZERO;
816  }
817  }
818  }
819  else {
820  for(IndexType i=0; i<m; i++) {
821  for(IndexType j=0; j<k; j++) {
822  C(i,j) = beta*C(i,j);
823  }
824  }
825  }
826  }
827 
828  // Start the operations
829  if(transB[0] == 'n' || transB[0] == 'N') {
830  if(transA[0] == 'n' || transA[0] == 'N') {
831  // Form C = alpha*A*B + beta*C
832  for(IndexType j=0; j<n; j++) {
833  if(beta == ZERO) {
834  for(IndexType i=0; i<m; i++) {
835  C(i,j) = ZERO;
836  }
837  }
838  else if(beta != ONE) {
839  for(IndexType i=0; i<m; i++) {
840  C(i,j) = beta*C(i,j);
841  }
842  }
843  for(IndexType l=0; l<k; l++) {
844  Scalar temp = alpha*B(l,j);
845  for(IndexType i=0; i<m; i++) {
846  C(i,j) = C(i,j) + temp*A(i,l);
847  }
848  }
849  }
850  }
851  else {
852  // Form C = alpha*A**T*B + beta*C
853  for(IndexType j=0; j<n; j++) {
854  for(IndexType i=0; i<m; i++) {
855  Scalar temp = ZERO;
856  for(IndexType l=0; l<k; l++) {
857  temp = temp + A(l,i)*B(l,j);
858  }
859  if(beta == ZERO) {
860  C(i,j) = alpha*temp;
861  }
862  else {
863  C(i,j) = alpha*temp + beta*C(i,j);
864  }
865  }
866  }
867  }
868  }
869  else {
870  if(transA[0] == 'n' || transA[0] == 'N') {
871  // Form C = alpha*A*B**T + beta*C
872  for(IndexType j=0; j<n; j++) {
873  if(beta == ZERO) {
874  for(IndexType i=0; i<m; i++) {
875  C(i,j) = ZERO;
876  }
877  }
878  else if(beta != ONE) {
879  for(IndexType i=0; i<m; i++) {
880  C(i,j) = beta*C(i,j);
881  }
882  }
883  for(IndexType l=0; l<k; l++) {
884  Scalar temp = alpha*B(j,l);
885  for(IndexType i=0; i<m; i++) {
886  C(i,j) = C(i,j) + temp*A(i,l);
887  }
888  }
889  }
890  }
891  else {
892  // Form C = alpha*A**T*B**T + beta*C
893  for(IndexType j=0; j<n; j++) {
894  for(IndexType i=0; i<m; i++) {
895  Scalar temp = ZERO;
896  for(IndexType l=0; l<k; l++) {
897  temp = temp + A(l,i)*B(j,l);
898  }
899  if(beta == ZERO) {
900  C(i,j) = alpha*temp;
901  }
902  else {
903  C(i,j) = alpha*temp + beta*C(i,j);
904  }
905  }
906  }
907  }
908  }
909 }
910 
912 template<class LittleBlockType,
913  class LittleVectorType>
914 KOKKOS_INLINE_FUNCTION void
915 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
916 {
917  // The type of an entry of ipiv is the index type.
918  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
919  static_assert (std::is_integral<IndexType>::value,
920  "GETF2: The type of each entry of ipiv must be an integer type.");
921  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
922  static_assert (! std::is_const<Scalar>::value,
923  "GETF2: A must not be a const View (or LittleBlock).");
924  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
925  "GETF2: ipiv must not be a const View (or LittleBlock).");
926  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
927  typedef Kokkos::Details::ArithTraits<Scalar> STS;
928  const Scalar ZERO = STS::zero();
929 
930  const IndexType numRows = static_cast<IndexType> (A.extent (0));
931  const IndexType numCols = static_cast<IndexType> (A.extent (1));
932  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
933 
934  // std::min is not a CUDA device function
935  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
936  if (pivDim < minPivDim) {
937  info = -2;
938  return;
939  }
940 
941  // Initialize info
942  info = 0;
943 
944  for(IndexType j=0; j < pivDim; j++)
945  {
946  // Find pivot and test for singularity
947  IndexType jp = j;
948  for(IndexType i=j+1; i<numRows; i++)
949  {
950  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
951  jp = i;
952  }
953  }
954  ipiv(j) = jp+1;
955 
956  if(A(jp,j) != ZERO)
957  {
958  // Apply the interchange to columns 1:N
959  if(jp != j)
960  {
961  for(IndexType i=0; i < numCols; i++)
962  {
963  Scalar temp = A(jp,i);
964  A(jp,i) = A(j,i);
965  A(j,i) = temp;
966  }
967  }
968 
969  // Compute elements J+1:M of J-th column
970  for(IndexType i=j+1; i<numRows; i++) {
971  A(i,j) = A(i,j) / A(j,j);
972  }
973  }
974  else if(info == 0) {
975  info = j;
976  }
977 
978  // Update trailing submatrix
979  for(IndexType r=j+1; r < numRows; r++)
980  {
981  for(IndexType c=j+1; c < numCols; c++) {
982  A(r,c) = A(r,c) - A(r,j) * A(j,c);
983  }
984  }
985  }
986 }
987 
988 namespace Impl {
989 
993 template<class LittleBlockType,
994  class LittleIntVectorType,
995  class LittleScalarVectorType,
996  const int rank = LittleScalarVectorType::rank>
997 struct GETRS {
998  static KOKKOS_INLINE_FUNCTION void
999  run (const char mode[],
1000  const LittleBlockType& A,
1001  const LittleIntVectorType& ipiv,
1002  const LittleScalarVectorType& B,
1003  int& info);
1004 };
1005 
1007 template<class LittleBlockType,
1008  class LittleIntVectorType,
1009  class LittleScalarVectorType>
1010 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1011  static KOKKOS_INLINE_FUNCTION void
1012  run (const char mode[],
1013  const LittleBlockType& A,
1014  const LittleIntVectorType& ipiv,
1015  const LittleScalarVectorType& B,
1016  int& info)
1017  {
1018  // The type of an entry of ipiv is the index type.
1019  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1020  // IndexType must be signed, because this code does a countdown loop
1021  // to zero. Unsigned integers are always >= 0, even on underflow.
1022  static_assert (std::is_integral<IndexType>::value &&
1023  std::is_signed<IndexType>::value,
1024  "GETRS: The type of each entry of ipiv must be a signed integer.");
1025  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1026  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1027  "GETRS: B must not be a const View (or LittleBlock).");
1028  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1029  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1030  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
1031 
1032  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1033  const Scalar ZERO = STS::zero();
1034  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1035  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1036  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1037 
1038  info = 0;
1039 
1040  // Ensure that the matrix is square
1041  if (numRows != numCols) {
1042  info = -2;
1043  return;
1044  }
1045 
1046  // Ensure that the pivot array is sufficiently large
1047  if (pivDim < numRows) {
1048  info = -3;
1049  return;
1050  }
1051 
1052  // No transpose case
1053  if(mode[0] == 'n' || mode[0] == 'N') {
1054  // Apply row interchanges to the RHS
1055  for(IndexType i=0; i<numRows; i++) {
1056  if(ipiv(i) != i+1) {
1057  Scalar temp = B(i);
1058  B(i) = B(ipiv(i)-1);
1059  B(ipiv(i)-1) = temp;
1060  }
1061  }
1062 
1063  // Solve Lx=b, overwriting b with x
1064  for(IndexType r=1; r < numRows; r++) {
1065  for(IndexType c=0; c < r; c++) {
1066  B(r) = B(r) - A(r,c)*B(c);
1067  }
1068  }
1069 
1070  // Solve Ux=b, overwriting b with x
1071  for(IndexType r=numRows-1; r >= 0; r--) {
1072  // Check whether U is singular
1073  if(A(r,r) == ZERO) {
1074  info = r+1;
1075  return;
1076  }
1077 
1078  for(IndexType c=r+1; c < numCols; c++) {
1079  B(r) = B(r) - A(r,c)*B(c);
1080  }
1081  B(r) = B(r) / A(r,r);
1082  }
1083  }
1084  // Transpose case
1085  else if(mode[0] == 't' || mode[0] == 'T') {
1086  info = -1; // NOT YET IMPLEMENTED
1087  return;
1088  }
1089  // Conjugate transpose case
1090  else if (mode[0] == 'c' || mode[0] == 'C') {
1091  info = -1; // NOT YET IMPLEMENTED
1092  return;
1093  }
1094  else { // invalid mode
1095  info = -1;
1096  return;
1097  }
1098  }
1099 };
1100 
1101 
1103 template<class LittleBlockType,
1104  class LittleIntVectorType,
1105  class LittleScalarVectorType>
1106 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1107  static KOKKOS_INLINE_FUNCTION void
1108  run (const char mode[],
1109  const LittleBlockType& A,
1110  const LittleIntVectorType& ipiv,
1111  const LittleScalarVectorType& B,
1112  int& info)
1113  {
1114  // The type of an entry of ipiv is the index type.
1115  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1116  static_assert (std::is_integral<IndexType>::value,
1117  "GETRS: The type of each entry of ipiv must be an integer type.");
1118  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1119  "GETRS: B must not be a const View (or LittleBlock).");
1120  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1121  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1122  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1123 
1124  // The current implementation iterates over one right-hand side at
1125  // a time. It might be faster to do this differently, but this
1126  // should work for now.
1127  const IndexType numRhs = B.extent (1);
1128  info = 0;
1129 
1130  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1131  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1133  if (info != 0) {
1134  return;
1135  }
1136  }
1137  }
1138 };
1139 
1140 } // namespace Impl
1141 
1145 template<class LittleBlockType,
1146  class LittleIntVectorType,
1147  class LittleScalarVectorType>
1148 KOKKOS_INLINE_FUNCTION void
1149 GETRS (const char mode[],
1150  const LittleBlockType& A,
1151  const LittleIntVectorType& ipiv,
1152  const LittleScalarVectorType& B,
1153  int& info)
1154 {
1155  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1156  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1157 }
1158 
1159 
1174 template<class LittleBlockType,
1175  class LittleIntVectorType,
1176  class LittleScalarVectorType>
1177 KOKKOS_INLINE_FUNCTION void
1178 GETRI (const LittleBlockType& A,
1179  const LittleIntVectorType& ipiv,
1180  const LittleScalarVectorType& work,
1181  int& info)
1182 {
1183  // The type of an entry of ipiv is the index type.
1184  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1185  // IndexType must be signed, because this code does a countdown loop
1186  // to zero. Unsigned integers are always >= 0, even on underflow.
1187  static_assert (std::is_integral<IndexType>::value &&
1188  std::is_signed<IndexType>::value,
1189  "GETRI: The type of each entry of ipiv must be a signed integer.");
1190  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1191  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1192  "GETRI: A must not be a const View (or LittleBlock).");
1193  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1194  "GETRI: work must not be a const View (or LittleBlock).");
1195  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1196  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1197  const Scalar ZERO = STS::zero();
1198  const Scalar ONE = STS::one();
1199 
1200  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1201  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1202  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1203  const IndexType workDim = static_cast<IndexType> (work.extent (0));
1204 
1205  info = 0;
1206 
1207  // Ensure that the matrix is square
1208  if (numRows != numCols) {
1209  info = -1;
1210  return;
1211  }
1212 
1213  // Ensure that the pivot array is sufficiently large
1214  if (pivDim < numRows) {
1215  info = -2;
1216  return;
1217  }
1218 
1219  // Ensure that the work array is sufficiently large
1220  if (workDim < numRows) {
1221  info = -3;
1222  return;
1223  }
1224 
1225  // Form Uinv in place
1226  for(IndexType j=0; j < numRows; j++) {
1227  if(A(j,j) == ZERO) {
1228  info = j+1;
1229  return;
1230  }
1231 
1232  A(j,j) = ONE / A(j,j);
1233 
1234  // Compute elements 1:j-1 of j-th column
1235  for(IndexType r=0; r < j; r++) {
1236  A(r,j) = A(r,r)*A(r,j);
1237  for(IndexType c=r+1; c < j; c++) {
1238  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1239  }
1240  }
1241  for(IndexType r=0; r < j; r++) {
1242  A(r,j) = -A(j,j)*A(r,j);
1243  }
1244  }
1245 
1246  // Compute Ainv by solving A\L = Uinv
1247  for(IndexType j = numCols-2; j >= 0; j--) {
1248  // Copy lower triangular data to work array and replace with 0
1249  for(IndexType r=j+1; r < numRows; r++) {
1250  work(r) = A(r,j);
1251  A(r,j) = 0;
1252  }
1253 
1254  for(IndexType r=0; r < numRows; r++) {
1255  for(IndexType i=j+1; i < numRows; i++) {
1256  A(r,j) = A(r,j) - work(i)*A(r,i);
1257  }
1258  }
1259  }
1260 
1261  // Apply column interchanges
1262  for(IndexType j=numRows-1; j >= 0; j--) {
1263  IndexType jp = ipiv(j)-1;
1264  if(j != jp) {
1265  for(IndexType r=0; r < numRows; r++) {
1266  Scalar temp = A(r,j);
1267  A(r,j) = A(r,jp);
1268  A(r,jp) = temp;
1269  }
1270  }
1271  }
1272 }
1273 
1274 
1275 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1276 // an implementation for trans != 'N' (the transpose and conjugate
1277 // transpose cases).
1278 #if 0
1279 template<class LittleBlockType,
1280  class LittleVectorTypeX,
1281  class LittleVectorTypeY,
1282  class CoefficientType,
1283  class IndexType = int>
1284 KOKKOS_INLINE_FUNCTION void
1285 GEMV (const char trans,
1286  const CoefficientType& alpha,
1287  const LittleBlockType& A,
1288  const LittleVectorTypeX& x,
1289  const CoefficientType& beta,
1290  const LittleVectorTypeY& y)
1291 {
1292  // y(0) returns a reference to the 0-th entry of y. Remove that
1293  // reference to get the type of each entry of y. It's OK if y has
1294  // zero entries -- this doesn't actually do y(i), it just returns
1295  // the type of that expression.
1296  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1297  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1298  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1299 
1300  if (beta == 0.0) {
1301  if (alpha == 0.0) {
1302  for (IndexType i = 0; i < numRows; ++i) {
1303  y(i) = 0.0;
1304  }
1305  }
1306  else {
1307  for (IndexType i = 0; i < numRows; ++i) {
1308  y_value_type y_i = 0.0;
1309  for (IndexType j = 0; j < numCols; ++j) {
1310  y_i += A(i,j) * x(j);
1311  }
1312  y(i) = y_i;
1313  }
1314  }
1315  }
1316  else { // beta != 0
1317  if (alpha == 0.0) {
1318  if (beta == 0.0) {
1319  for (IndexType i = 0; i < numRows; ++i) {
1320  y(i) = 0.0;
1321  }
1322  }
1323  else {
1324  for (IndexType i = 0; i < numRows; ++i) {
1325  y(i) *= beta;
1326  }
1327  }
1328  }
1329  else {
1330  for (IndexType i = 0; i < numRows; ++i) {
1331  y_value_type y_i = beta * y(i);
1332  for (IndexType j = 0; j < numCols; ++j) {
1333  y_i += alpha * A(i,j) * x(j);
1334  }
1335  y(i) = y_i;
1336  }
1337  }
1338  }
1339 }
1340 
1341 #endif // 0
1342 
1343 } // namespace Experimental
1344 } // namespace Tpetra
1345 
1346 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)