10 #ifndef TPETRA_BLOCKVIEW_HPP
11 #define TPETRA_BLOCKVIEW_HPP
21 #include "TpetraCore_config.h"
22 #if KOKKOS_VERSION >= 40799
23 #include "KokkosKernels_ArithTraits.hpp"
25 #include "Kokkos_ArithTraits.hpp"
27 #include "Kokkos_Complex.hpp"
44 template <
class ViewType1,
46 const int rank1 = ViewType1::rank>
48 static KOKKOS_INLINE_FUNCTION
void
49 run(
const ViewType2& Y,
const ViewType1& X);
56 template <
class ViewType1,
58 struct AbsMax<ViewType1, ViewType2, 2> {
61 static KOKKOS_INLINE_FUNCTION
void
62 run(
const ViewType2& Y,
const ViewType1& X) {
63 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
64 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
65 typedef typename std::remove_reference<decltype(Y(0, 0))>::type STY;
66 static_assert(!std::is_const<STY>::value,
67 "AbsMax: The type of each entry of Y must be nonconst.");
68 typedef typename std::decay<decltype(X(0, 0))>::type STX;
69 static_assert(std::is_same<STX, STY>::value,
70 "AbsMax: The type of each entry of X and Y must be the same.");
71 #if KOKKOS_VERSION >= 40799
72 typedef KokkosKernels::ArithTraits<STY> KAT;
74 typedef Kokkos::ArithTraits<STY> KAT;
77 const int numCols = Y.extent(1);
78 const int numRows = Y.extent(0);
79 for (
int j = 0; j < numCols; ++j) {
80 for (
int i = 0; i < numRows; ++i) {
82 const STX X_ij = X(i, j);
85 const auto Y_ij_abs = KAT::abs(Y_ij);
86 const auto X_ij_abs = KAT::abs(X_ij);
87 Y_ij = (Y_ij_abs >= X_ij_abs) ? static_cast<STY>(Y_ij_abs) :
static_cast<STY
>(X_ij_abs);
97 template <
class ViewType1,
99 struct AbsMax<ViewType1, ViewType2, 1> {
102 static KOKKOS_INLINE_FUNCTION
void
103 run(
const ViewType2& Y,
const ViewType1& X) {
104 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
105 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
107 typedef typename std::remove_reference<decltype(Y(0))>::type STY;
108 static_assert(!std::is_const<STY>::value,
109 "AbsMax: The type of each entry of Y must be nonconst.");
110 typedef typename std::remove_const<typename std::remove_reference<decltype(X(0))>::type>::type STX;
111 static_assert(std::is_same<STX, STY>::value,
112 "AbsMax: The type of each entry of X and Y must be the same.");
113 #if KOKKOS_VERSION >= 40799
114 typedef KokkosKernels::ArithTraits<STY> KAT;
116 typedef Kokkos::ArithTraits<STY> KAT;
119 const int numRows = Y.extent(0);
120 for (
int i = 0; i < numRows; ++i) {
122 const STX X_i = X(i);
125 const auto Y_i_abs = KAT::abs(Y_i);
126 const auto X_i_abs = KAT::abs(X_i);
127 Y_i = (Y_i_abs >= X_i_abs) ? static_cast<STY>(Y_i_abs) :
static_cast<STY
>(X_i_abs);
138 template <
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
139 KOKKOS_INLINE_FUNCTION
void
140 absMax(
const ViewType2& Y,
const ViewType1& X) {
141 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
142 "absMax: ViewType1 and ViewType2 must have the same rank.");
150 template <
class ViewType,
151 class CoefficientType,
152 class IndexType = int,
153 const bool is_contiguous =
false,
154 const int rank = ViewType::rank>
156 static KOKKOS_INLINE_FUNCTION
void
157 run(
const CoefficientType& alpha,
const ViewType& x);
162 template <
class ViewType,
163 class CoefficientType,
165 struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
167 static KOKKOS_INLINE_FUNCTION
void
168 run(
const CoefficientType& alpha,
const ViewType& x) {
169 const IndexType numRows =
static_cast<IndexType
>(x.extent(0));
172 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
175 for (IndexType i = 0; i < numRows; ++i)
181 template <
class ViewType,
182 class CoefficientType,
184 struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
186 static KOKKOS_INLINE_FUNCTION
void
187 run(
const CoefficientType& alpha,
const ViewType& A) {
188 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
189 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
191 for (IndexType j = 0; j < numCols; ++j)
192 for (IndexType i = 0; i < numRows; ++i)
193 A(i, j) = alpha * A(i, j);
196 template <
class ViewType,
197 class CoefficientType,
200 struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
202 static KOKKOS_INLINE_FUNCTION
void
203 run(
const CoefficientType& alpha,
const ViewType& x) {
204 using x_value_type =
typename std::decay<decltype(*x.data())>::type;
205 const IndexType span =
static_cast<IndexType
>(x.span());
206 x_value_type* __restrict x_ptr(x.data());
207 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
210 for (IndexType i = 0; i < span; ++i)
211 x_ptr[i] = alpha * x_ptr[i];
219 template <
class ViewType,
221 class IndexType = int,
222 const bool is_contiguous =
false,
223 const int rank = ViewType::rank>
225 static KOKKOS_INLINE_FUNCTION
void
226 run(
const ViewType& x,
const InputType& val);
231 template <
class ViewType,
234 struct FILL<ViewType, InputType, IndexType, false, 1> {
235 static KOKKOS_INLINE_FUNCTION
void
236 run(
const ViewType& x,
const InputType& val) {
237 const IndexType numRows =
static_cast<IndexType
>(x.extent(0));
238 for (IndexType i = 0; i < numRows; ++i)
244 template <
class ViewType,
247 struct FILL<ViewType, InputType, IndexType, false, 2> {
248 static KOKKOS_INLINE_FUNCTION
void
249 run(
const ViewType& X,
const InputType& val) {
250 const IndexType numRows =
static_cast<IndexType
>(X.extent(0));
251 const IndexType numCols =
static_cast<IndexType
>(X.extent(1));
252 for (IndexType j = 0; j < numCols; ++j)
253 for (IndexType i = 0; i < numRows; ++i)
257 template <
class ViewType,
261 struct FILL<ViewType, InputType, IndexType, true, rank> {
262 static KOKKOS_INLINE_FUNCTION
void
263 run(
const ViewType& x,
const InputType& val) {
264 const IndexType span =
static_cast<IndexType
>(x.span());
265 auto x_ptr = x.data();
266 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
269 for (IndexType i = 0; i < span; ++i)
278 template <
class CoefficientType,
281 class IndexType = int,
282 const bool is_contiguous =
false,
283 const int rank = ViewType1::rank>
285 static KOKKOS_INLINE_FUNCTION
void
286 run(
const CoefficientType& alpha,
293 template <
class CoefficientType,
297 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
299 static KOKKOS_INLINE_FUNCTION
void
300 run(
const CoefficientType& alpha,
302 const ViewType2& y) {
303 #if KOKKOS_VERSION >= 40799
304 using KokkosKernels::ArithTraits;
306 using Kokkos::ArithTraits;
308 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
309 "AXPY: x and y must have the same rank.");
311 const IndexType numRows =
static_cast<IndexType
>(y.extent(0));
312 if (alpha != ArithTraits<CoefficientType>::zero()) {
314 for (IndexType i = 0; i < numRows; ++i)
315 y(i) += alpha * x(i);
322 template <
class CoefficientType,
326 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
328 static KOKKOS_INLINE_FUNCTION
void
329 run(
const CoefficientType& alpha,
331 const ViewType2& Y) {
332 #if KOKKOS_VERSION >= 40799
333 using KokkosKernels::ArithTraits;
335 using Kokkos::ArithTraits;
337 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
338 "AXPY: X and Y must have the same rank.");
339 const IndexType numRows =
static_cast<IndexType
>(Y.extent(0));
340 const IndexType numCols =
static_cast<IndexType
>(Y.extent(1));
342 if (alpha != ArithTraits<CoefficientType>::zero()) {
343 for (IndexType j = 0; j < numCols; ++j)
344 for (IndexType i = 0; i < numRows; ++i)
345 Y(i, j) += alpha * X(i, j);
350 template <
class CoefficientType,
355 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
357 static KOKKOS_INLINE_FUNCTION
void
358 run(
const CoefficientType& alpha,
360 const ViewType2& y) {
361 #if KOKKOS_VERSION >= 40799
362 using KokkosKernels::ArithTraits;
364 using Kokkos::ArithTraits;
366 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
367 "AXPY: x and y must have the same rank.");
369 if (alpha != ArithTraits<CoefficientType>::zero()) {
370 using x_value_type =
typename std::decay<decltype(*x.data())>::type;
371 using y_value_type =
typename std::decay<decltype(*y.data())>::type;
372 const IndexType span =
static_cast<IndexType
>(y.span());
373 const x_value_type* __restrict x_ptr(x.data());
374 y_value_type* __restrict y_ptr(y.data());
375 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
378 for (IndexType i = 0; i < span; ++i)
379 y_ptr[i] += alpha * x_ptr[i];
388 template <
class ViewType1,
390 class IndexType = int,
391 const bool is_contiguous =
false,
392 const int rank = ViewType1::rank>
394 static KOKKOS_INLINE_FUNCTION
void
395 run(
const ViewType1& x,
const ViewType2& y);
400 template <
class ViewType1,
403 struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
405 static KOKKOS_INLINE_FUNCTION
void
406 run(
const ViewType1& x,
const ViewType2& y) {
407 const IndexType numRows =
static_cast<IndexType
>(x.extent(0));
409 for (IndexType i = 0; i < numRows; ++i)
416 template <
class ViewType1,
419 struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
421 static KOKKOS_INLINE_FUNCTION
void
422 run(
const ViewType1& X,
const ViewType2& Y) {
423 const IndexType numRows =
static_cast<IndexType
>(Y.extent(0));
424 const IndexType numCols =
static_cast<IndexType
>(Y.extent(1));
426 for (IndexType j = 0; j < numCols; ++j)
427 for (IndexType i = 0; i < numRows; ++i)
432 template <
class ViewType1,
436 struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
438 static KOKKOS_INLINE_FUNCTION
void
439 run(
const ViewType1& x,
const ViewType2& y) {
440 const IndexType span =
static_cast<IndexType
>(x.span());
441 using x_value_type =
typename std::decay<decltype(*x.data())>::type;
442 using y_value_type =
typename std::decay<decltype(*y.data())>::type;
443 const x_value_type* __restrict x_ptr(x.data());
444 y_value_type* __restrict y_ptr(y.data());
446 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
449 for (IndexType i = 0; i < span; ++i)
454 template <
class VecType1,
458 class IndexType = int,
459 bool is_contiguous =
false,
460 class BlkLayoutType =
typename BlkType::array_layout>
462 static KOKKOS_INLINE_FUNCTION
void
463 run(
const CoeffType& alpha,
467 static_assert(VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
468 static_assert(BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
469 static_assert(VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
471 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
472 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
475 for (IndexType i = 0; i < numRows; ++i)
476 for (IndexType j = 0; j < numCols; ++j)
477 y(i) += alpha * A(i, j) * x(j);
481 template <
class VecType1,
486 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutLeft> {
487 static KOKKOS_INLINE_FUNCTION
void
488 run(
const CoeffType& alpha,
492 static_assert(VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
493 static_assert(BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
494 static_assert(VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
496 using A_value_type =
typename std::decay<decltype(A(0, 0))>::type;
497 using x_value_type =
typename std::decay<decltype(x(0))>::type;
498 using y_value_type =
typename std::decay<decltype(y(0))>::type;
500 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
501 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
503 const A_value_type* __restrict A_ptr(A.data());
504 const IndexType as1(A.stride(1));
505 const x_value_type* __restrict x_ptr(x.data());
506 y_value_type* __restrict y_ptr(y.data());
508 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
511 for (IndexType j = 0; j < numCols; ++j) {
512 const x_value_type x_at_j = alpha * x_ptr[j];
513 const A_value_type* __restrict A_at_j = A_ptr + j * as1;
514 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
517 for (IndexType i = 0; i < numRows; ++i)
518 y_ptr[i] += A_at_j[i] * x_at_j;
523 template <
class VecType1,
528 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutRight> {
529 static KOKKOS_INLINE_FUNCTION
void
530 run(
const CoeffType& alpha,
534 static_assert(VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
535 static_assert(BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
536 static_assert(VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
538 using A_value_type =
typename std::decay<decltype(A(0, 0))>::type;
539 using x_value_type =
typename std::decay<decltype(x(0))>::type;
540 using y_value_type =
typename std::decay<decltype(y(0))>::type;
542 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
543 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
545 const A_value_type* __restrict A_ptr(A.data());
546 const IndexType as0(A.stride(0));
547 const x_value_type* __restrict x_ptr(x.data());
548 y_value_type* __restrict y_ptr(y.data());
550 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
553 for (IndexType i = 0; i < numRows; ++i) {
554 y_value_type y_at_i(0);
555 const auto A_at_i = A_ptr + i * as0;
556 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
559 for (IndexType j = 0; j < numCols; ++j)
560 y_at_i += A_at_i[j] * x_ptr[j];
561 y_ptr[i] += alpha * y_at_i;
570 template <
class ViewType,
571 class CoefficientType,
572 class IndexType = int,
573 const int rank = ViewType::rank>
574 KOKKOS_INLINE_FUNCTION
void
575 SCAL(
const CoefficientType& alpha,
const ViewType& x) {
576 using LayoutType =
typename ViewType::array_layout;
577 constexpr
bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
578 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
583 template <
class ViewType,
585 class IndexType = int,
586 const int rank = ViewType::rank>
587 KOKKOS_INLINE_FUNCTION
void
588 FILL(
const ViewType& x,
const InputType& val) {
589 using LayoutType =
typename ViewType::array_layout;
590 constexpr
bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
591 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
600 template <
class CoefficientType,
603 class IndexType = int,
604 const int rank = ViewType1::rank>
605 KOKKOS_INLINE_FUNCTION
void
606 AXPY(
const CoefficientType& alpha,
608 const ViewType2& y) {
609 static_assert(static_cast<int>(ViewType1::rank) ==
610 static_cast<int>(ViewType2::rank),
611 "AXPY: x and y must have the same rank.");
612 using LayoutType1 =
typename ViewType1::array_layout;
613 using LayoutType2 =
typename ViewType2::array_layout;
614 constexpr
bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
615 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
616 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
617 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
629 template <
class ViewType1,
631 class IndexType = int,
632 const int rank = ViewType1::rank>
633 KOKKOS_INLINE_FUNCTION
void
634 COPY(
const ViewType1& x,
const ViewType2& y) {
635 static_assert(static_cast<int>(ViewType1::rank) ==
636 static_cast<int>(ViewType2::rank),
637 "COPY: x and y must have the same rank.");
638 using LayoutType1 =
typename ViewType1::array_layout;
639 using LayoutType2 =
typename ViewType2::array_layout;
640 constexpr
bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
641 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
642 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
643 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
658 template <
class VecType1,
662 class IndexType =
int>
663 KOKKOS_INLINE_FUNCTION
void
668 constexpr
bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
669 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
670 constexpr
bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
671 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
672 constexpr
bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
673 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
674 constexpr
bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
675 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run(alpha, A, x, y);
685 template <
class ViewType1,
688 class CoefficientType,
689 class IndexType =
int>
690 KOKKOS_INLINE_FUNCTION
void
693 const CoefficientType& alpha,
696 const CoefficientType& beta,
697 const ViewType3& C) {
699 static_assert(ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
700 static_assert(ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
701 static_assert(ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
703 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
704 #if KOKKOS_VERSION >= 40799
705 typedef KokkosKernels::ArithTraits<Scalar> STS;
707 typedef Kokkos::ArithTraits<Scalar> STS;
709 const Scalar
ZERO = STS::zero();
710 const Scalar ONE = STS::one();
714 if (transA[0] ==
'N' || transA[0] ==
'n') {
715 m =
static_cast<IndexType
>(A.extent(0));
716 n =
static_cast<IndexType
>(A.extent(1));
718 m =
static_cast<IndexType
>(A.extent(1));
719 n =
static_cast<IndexType
>(A.extent(0));
721 k =
static_cast<IndexType
>(C.extent(1));
724 if (alpha == ZERO && beta == ONE)
730 for (IndexType i = 0; i < m; i++) {
731 for (IndexType j = 0; j < k; j++) {
736 for (IndexType i = 0; i < m; i++) {
737 for (IndexType j = 0; j < k; j++) {
738 C(i, j) = beta * C(i, j);
745 if (transB[0] ==
'n' || transB[0] ==
'N') {
746 if (transA[0] ==
'n' || transA[0] ==
'N') {
748 for (IndexType j = 0; j < n; j++) {
750 for (IndexType i = 0; i < m; i++) {
753 }
else if (beta != ONE) {
754 for (IndexType i = 0; i < m; i++) {
755 C(i, j) = beta * C(i, j);
758 for (IndexType l = 0; l < k; l++) {
759 Scalar temp = alpha * B(l, j);
760 for (IndexType i = 0; i < m; i++) {
761 C(i, j) = C(i, j) + temp * A(i, l);
767 for (IndexType j = 0; j < n; j++) {
768 for (IndexType i = 0; i < m; i++) {
770 for (IndexType l = 0; l < k; l++) {
771 temp = temp + A(l, i) * B(l, j);
774 C(i, j) = alpha * temp;
776 C(i, j) = alpha * temp + beta * C(i, j);
782 if (transA[0] ==
'n' || transA[0] ==
'N') {
784 for (IndexType j = 0; j < n; j++) {
786 for (IndexType i = 0; i < m; i++) {
789 }
else if (beta != ONE) {
790 for (IndexType i = 0; i < m; i++) {
791 C(i, j) = beta * C(i, j);
794 for (IndexType l = 0; l < k; l++) {
795 Scalar temp = alpha * B(j, l);
796 for (IndexType i = 0; i < m; i++) {
797 C(i, j) = C(i, j) + temp * A(i, l);
803 for (IndexType j = 0; j < n; j++) {
804 for (IndexType i = 0; i < m; i++) {
806 for (IndexType l = 0; l < k; l++) {
807 temp = temp + A(l, i) * B(j, l);
810 C(i, j) = alpha * temp;
812 C(i, j) = alpha * temp + beta * C(i, j);
821 template <
class LittleBlockType,
822 class LittleVectorType>
823 KOKKOS_INLINE_FUNCTION
void
824 GETF2(
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info) {
826 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
827 static_assert(std::is_integral<IndexType>::value,
828 "GETF2: The type of each entry of ipiv must be an integer type.");
829 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
830 static_assert(!std::is_const<Scalar>::value,
831 "GETF2: A must not be a const View (or LittleBlock).");
832 static_assert(!std::is_const<std::remove_reference<decltype(ipiv(0))>>::value,
833 "GETF2: ipiv must not be a const View (or LittleBlock).");
834 static_assert(LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
835 #if KOKKOS_VERSION >= 40799
836 typedef KokkosKernels::ArithTraits<Scalar> STS;
838 typedef Kokkos::ArithTraits<Scalar> STS;
840 const Scalar
ZERO = STS::zero();
842 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
843 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
844 const IndexType pivDim =
static_cast<IndexType
>(ipiv.extent(0));
847 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
848 if (pivDim < minPivDim) {
856 for (IndexType j = 0; j < pivDim; j++) {
859 for (IndexType i = j + 1; i < numRows; i++) {
860 if (STS::abs(A(i, j)) > STS::abs(A(jp, j))) {
866 if (A(jp, j) != ZERO) {
869 for (IndexType i = 0; i < numCols; i++) {
870 Scalar temp = A(jp, i);
877 for (IndexType i = j + 1; i < numRows; i++) {
878 A(i, j) = A(i, j) / A(j, j);
880 }
else if (info == 0) {
885 for (IndexType r = j + 1; r < numRows; r++) {
886 for (IndexType c = j + 1; c < numCols; c++) {
887 A(r, c) = A(r, c) - A(r, j) * A(j, c);
898 template <
class LittleBlockType,
899 class LittleIntVectorType,
900 class LittleScalarVectorType,
901 const int rank = LittleScalarVectorType::rank>
903 static KOKKOS_INLINE_FUNCTION
void
904 run(
const char mode[],
905 const LittleBlockType& A,
906 const LittleIntVectorType& ipiv,
907 const LittleScalarVectorType& B,
912 template <
class LittleBlockType,
913 class LittleIntVectorType,
914 class LittleScalarVectorType>
915 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
916 static KOKKOS_INLINE_FUNCTION
void
917 run(
const char mode[],
918 const LittleBlockType& A,
919 const LittleIntVectorType& ipiv,
920 const LittleScalarVectorType& B,
923 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
926 static_assert(std::is_integral<IndexType>::value &&
927 std::is_signed<IndexType>::value,
928 "GETRS: The type of each entry of ipiv must be a signed integer.");
929 typedef typename std::decay<decltype(A(0, 0))>::type Scalar;
930 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
931 "GETRS: B must not be a const View (or LittleBlock).");
932 static_assert(LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
933 static_assert(LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
934 static_assert(LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
936 #if KOKKOS_VERSION >= 40799
937 typedef KokkosKernels::ArithTraits<Scalar> STS;
939 typedef Kokkos::ArithTraits<Scalar> STS;
941 const Scalar
ZERO = STS::zero();
942 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
943 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
944 const IndexType pivDim =
static_cast<IndexType
>(ipiv.extent(0));
949 if (numRows != numCols) {
955 if (pivDim < numRows) {
961 if (mode[0] ==
'n' || mode[0] ==
'N') {
963 for (IndexType i = 0; i < numRows; i++) {
964 if (ipiv(i) != i + 1) {
966 B(i) = B(ipiv(i) - 1);
967 B(ipiv(i) - 1) = temp;
972 for (IndexType r = 1; r < numRows; r++) {
973 for (IndexType c = 0; c < r; c++) {
974 B(r) = B(r) - A(r, c) * B(c);
979 for (IndexType r = numRows - 1; r >= 0; r--) {
981 if (A(r, r) == ZERO) {
986 for (IndexType c = r + 1; c < numCols; c++) {
987 B(r) = B(r) - A(r, c) * B(c);
989 B(r) = B(r) / A(r, r);
993 else if (mode[0] ==
't' || mode[0] ==
'T') {
998 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1009 template <
class LittleBlockType,
1010 class LittleIntVectorType,
1011 class LittleScalarVectorType>
1012 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1013 static KOKKOS_INLINE_FUNCTION
void
1014 run(
const char mode[],
1015 const LittleBlockType& A,
1016 const LittleIntVectorType& ipiv,
1017 const LittleScalarVectorType& B,
1020 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
1021 static_assert(std::is_integral<IndexType>::value,
1022 "GETRS: The type of each entry of ipiv must be an integer type.");
1023 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
1024 "GETRS: B must not be a const View (or LittleBlock).");
1025 static_assert(LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1026 static_assert(LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1027 static_assert(LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1032 const IndexType numRhs = B.extent(1);
1035 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1036 auto B_cur = Kokkos::subview(B, Kokkos::ALL(), rhs);
1050 template <
class LittleBlockType,
1051 class LittleIntVectorType,
1052 class LittleScalarVectorType>
1053 KOKKOS_INLINE_FUNCTION
void
1055 const LittleBlockType& A,
1056 const LittleIntVectorType& ipiv,
1057 const LittleScalarVectorType& B,
1059 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1060 LittleScalarVectorType::rank>::run(mode, A, ipiv, B, info);
1077 template <
class LittleBlockType,
1078 class LittleIntVectorType,
1079 class LittleScalarVectorType>
1080 KOKKOS_INLINE_FUNCTION
void
1082 const LittleIntVectorType& ipiv,
1083 const LittleScalarVectorType& work,
1086 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
1089 static_assert(std::is_integral<IndexType>::value &&
1090 std::is_signed<IndexType>::value,
1091 "GETRI: The type of each entry of ipiv must be a signed integer.");
1092 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
1093 static_assert(!std::is_const<std::remove_reference<decltype(A(0, 0))>>::value,
1094 "GETRI: A must not be a const View (or LittleBlock).");
1095 static_assert(!std::is_const<std::remove_reference<decltype(work(0))>>::value,
1096 "GETRI: work must not be a const View (or LittleBlock).");
1097 static_assert(LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1098 #if KOKKOS_VERSION >= 40799
1099 typedef KokkosKernels::ArithTraits<Scalar> STS;
1101 typedef Kokkos::ArithTraits<Scalar> STS;
1103 const Scalar
ZERO = STS::zero();
1104 const Scalar ONE = STS::one();
1106 const IndexType numRows =
static_cast<IndexType
>(A.extent(0));
1107 const IndexType numCols =
static_cast<IndexType
>(A.extent(1));
1108 const IndexType pivDim =
static_cast<IndexType
>(ipiv.extent(0));
1109 const IndexType workDim =
static_cast<IndexType
>(work.extent(0));
1114 if (numRows != numCols) {
1120 if (pivDim < numRows) {
1126 if (workDim < numRows) {
1132 for (IndexType j = 0; j < numRows; j++) {
1133 if (A(j, j) ==
ZERO) {
1138 A(j, j) = ONE / A(j, j);
1141 for (IndexType r = 0; r < j; r++) {
1142 A(r, j) = A(r, r) * A(r, j);
1143 for (IndexType c = r + 1; c < j; c++) {
1144 A(r, j) = A(r, j) + A(r, c) * A(c, j);
1147 for (IndexType r = 0; r < j; r++) {
1148 A(r, j) = -A(j, j) * A(r, j);
1153 for (IndexType j = numCols - 2; j >= 0; j--) {
1155 for (IndexType r = j + 1; r < numRows; r++) {
1160 for (IndexType r = 0; r < numRows; r++) {
1161 for (IndexType i = j + 1; i < numRows; i++) {
1162 A(r, j) = A(r, j) - work(i) * A(r, i);
1168 for (IndexType j = numRows - 1; j >= 0; j--) {
1169 IndexType jp = ipiv(j) - 1;
1171 for (IndexType r = 0; r < numRows; r++) {
1172 Scalar temp = A(r, j);
1184 template<
class LittleBlockType,
1185 class LittleVectorTypeX,
1186 class LittleVectorTypeY,
1187 class CoefficientType,
1188 class IndexType =
int>
1189 KOKKOS_INLINE_FUNCTION
void
1190 GEMV (
const char trans,
1191 const CoefficientType& alpha,
1192 const LittleBlockType& A,
1193 const LittleVectorTypeX& x,
1194 const CoefficientType& beta,
1195 const LittleVectorTypeY& y)
1201 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1202 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1203 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1207 for (IndexType i = 0; i < numRows; ++i) {
1212 for (IndexType i = 0; i < numRows; ++i) {
1213 y_value_type y_i = 0.0;
1214 for (IndexType j = 0; j < numCols; ++j) {
1215 y_i += A(i,j) * x(j);
1224 for (IndexType i = 0; i < numRows; ++i) {
1229 for (IndexType i = 0; i < numRows; ++i) {
1235 for (IndexType i = 0; i < numRows; ++i) {
1236 y_value_type y_i = beta * y(i);
1237 for (IndexType j = 0; j < numCols; ++j) {
1238 y_i += alpha * A(i,j) * x(j);
1250 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.