10 #ifndef TPETRA_BLOCKVIEW_HPP
11 #define TPETRA_BLOCKVIEW_HPP
21 #include "TpetraCore_config.h"
22 #include "Kokkos_ArithTraits.hpp"
23 #include "Kokkos_Complex.hpp"
40 template<
class ViewType1,
42 const int rank1 = ViewType1::rank>
44 static KOKKOS_INLINE_FUNCTION
void
45 run (
const ViewType2& Y,
const ViewType1& X);
52 template<
class ViewType1,
54 struct AbsMax<ViewType1, ViewType2, 2> {
57 static KOKKOS_INLINE_FUNCTION
void
58 run (
const ViewType2& Y,
const ViewType1& X)
60 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
61 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
62 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
63 static_assert(! std::is_const<STY>::value,
64 "AbsMax: The type of each entry of Y must be nonconst.");
65 typedef typename std::decay<decltype (X(0,0)) >::type STX;
66 static_assert( std::is_same<STX, STY>::value,
67 "AbsMax: The type of each entry of X and Y must be the same.");
68 typedef Kokkos::ArithTraits<STY> KAT;
70 const int numCols = Y.extent (1);
71 const int numRows = Y.extent (0);
72 for (
int j = 0; j < numCols; ++j) {
73 for (
int i = 0; i < numRows; ++i) {
75 const STX X_ij = X(i,j);
78 const auto Y_ij_abs = KAT::abs (Y_ij);
79 const auto X_ij_abs = KAT::abs (X_ij);
80 Y_ij = (Y_ij_abs >= X_ij_abs) ?
81 static_cast<STY> (Y_ij_abs) :
82 static_cast<STY
> (X_ij_abs);
92 template<
class ViewType1,
94 struct AbsMax<ViewType1, ViewType2, 1> {
97 static KOKKOS_INLINE_FUNCTION
void
98 run (
const ViewType2& Y,
const ViewType1& X)
100 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
103 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
104 static_assert(! std::is_const<STY>::value,
105 "AbsMax: The type of each entry of Y must be nonconst.");
106 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
107 static_assert( std::is_same<STX, STY>::value,
108 "AbsMax: The type of each entry of X and Y must be the same.");
109 typedef Kokkos::ArithTraits<STY> KAT;
111 const int numRows = Y.extent (0);
112 for (
int i = 0; i < numRows; ++i) {
114 const STX X_i = X(i);
117 const auto Y_i_abs = KAT::abs (Y_i);
118 const auto X_i_abs = KAT::abs (X_i);
119 Y_i = (Y_i_abs >= X_i_abs) ?
120 static_cast<STY> (Y_i_abs) :
121 static_cast<STY
> (X_i_abs);
132 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
133 KOKKOS_INLINE_FUNCTION
void
134 absMax (
const ViewType2& Y,
const ViewType1& X)
136 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
137 "absMax: ViewType1 and ViewType2 must have the same rank.");
145 template<
class ViewType,
146 class CoefficientType,
147 class IndexType = int,
148 const bool is_contiguous =
false,
149 const int rank = ViewType::rank>
151 static KOKKOS_INLINE_FUNCTION
void
152 run (
const CoefficientType& alpha,
const ViewType& x);
157 template<
class ViewType,
158 class CoefficientType,
160 struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
162 static KOKKOS_INLINE_FUNCTION
void
163 run (
const CoefficientType& alpha,
const ViewType& x)
165 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
168 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
171 for (IndexType i = 0; i < numRows; ++i)
177 template<
class ViewType,
178 class CoefficientType,
180 struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
182 static KOKKOS_INLINE_FUNCTION
void
183 run (
const CoefficientType& alpha,
const ViewType& A)
185 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
186 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
188 for (IndexType j = 0; j < numCols; ++j)
189 for (IndexType i = 0; i < numRows; ++i)
190 A(i,j) = alpha * A(i,j);
193 template<
class ViewType,
194 class CoefficientType,
197 struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
199 static KOKKOS_INLINE_FUNCTION
void
200 run (
const CoefficientType& alpha,
const ViewType& x)
202 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
203 const IndexType span =
static_cast<IndexType
> (x.span());
204 x_value_type *__restrict x_ptr(x.data());
205 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
208 for (IndexType i = 0; i < span; ++i)
209 x_ptr[i] = alpha * x_ptr[i];
217 template<
class ViewType,
219 class IndexType = int,
220 const bool is_contiguous =
false,
221 const int rank = ViewType::rank>
223 static KOKKOS_INLINE_FUNCTION
void
224 run (
const ViewType& x,
const InputType& val);
229 template<
class ViewType,
232 struct FILL<ViewType, InputType, IndexType, false, 1> {
233 static KOKKOS_INLINE_FUNCTION
void
234 run (
const ViewType& x,
const InputType& val)
236 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
237 for (IndexType i = 0; i < numRows; ++i)
243 template<
class ViewType,
246 struct FILL<ViewType, InputType, IndexType, false, 2> {
247 static KOKKOS_INLINE_FUNCTION
void
248 run (
const ViewType& X,
const InputType& val)
250 const IndexType numRows =
static_cast<IndexType
> (X.extent (0));
251 const IndexType numCols =
static_cast<IndexType
> (X.extent (1));
252 for (IndexType j = 0; j < numCols; ++j)
253 for (IndexType i = 0; i < numRows; ++i)
257 template<
class ViewType,
261 struct FILL<ViewType, InputType, IndexType, true, rank> {
262 static KOKKOS_INLINE_FUNCTION
void
263 run (
const ViewType& x,
const InputType& val)
265 const IndexType span =
static_cast<IndexType
> (x.span());
266 auto x_ptr = x.data();
267 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
270 for (IndexType i = 0; i < span; ++i)
280 template<
class CoefficientType,
283 class IndexType = int,
284 const bool is_contiguous =
false,
285 const int rank = ViewType1::rank>
287 static KOKKOS_INLINE_FUNCTION
void
288 run (
const CoefficientType& alpha,
295 template<
class CoefficientType,
299 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
301 static KOKKOS_INLINE_FUNCTION
void
302 run (
const CoefficientType& alpha,
306 using Kokkos::ArithTraits;
307 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
308 "AXPY: x and y must have the same rank.");
310 const IndexType numRows =
static_cast<IndexType
> (y.extent (0));
311 if (alpha != ArithTraits<CoefficientType>::zero ()) {
313 for (IndexType i = 0; i < numRows; ++i)
314 y(i) += alpha * x(i);
321 template<
class CoefficientType,
325 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
327 static KOKKOS_INLINE_FUNCTION
void
328 run (
const CoefficientType& alpha,
332 using Kokkos::ArithTraits;
333 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
334 "AXPY: X and Y must have the same rank.");
335 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
336 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
338 if (alpha != ArithTraits<CoefficientType>::zero ()) {
339 for (IndexType j = 0; j < numCols; ++j)
340 for (IndexType i = 0; i < numRows; ++i)
341 Y(i,j) += alpha * X(i,j);
346 template<
class CoefficientType,
351 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
353 static KOKKOS_INLINE_FUNCTION
void
354 run (
const CoefficientType& alpha,
358 using Kokkos::ArithTraits;
359 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
360 "AXPY: x and y must have the same rank.");
362 if (alpha != ArithTraits<CoefficientType>::zero ()) {
363 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
364 using y_value_type =
typename std::decay<decltype (*y.data()) >::type;
365 const IndexType span =
static_cast<IndexType
> (y.span());
366 const x_value_type *__restrict x_ptr(x.data());
367 y_value_type *__restrict y_ptr(y.data());
368 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
371 for (IndexType i = 0; i < span; ++i)
372 y_ptr[i] += alpha * x_ptr[i];
381 template<
class ViewType1,
383 class IndexType = int,
384 const bool is_contiguous =
false,
385 const int rank = ViewType1::rank>
387 static KOKKOS_INLINE_FUNCTION
void
388 run (
const ViewType1& x,
const ViewType2& y);
393 template<
class ViewType1,
396 struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
398 static KOKKOS_INLINE_FUNCTION
void
399 run (
const ViewType1& x,
const ViewType2& y)
401 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
403 for (IndexType i = 0; i < numRows; ++i)
410 template<
class ViewType1,
413 struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
415 static KOKKOS_INLINE_FUNCTION
void
416 run (
const ViewType1& X,
const ViewType2& Y)
418 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
419 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
421 for (IndexType j = 0; j < numCols; ++j)
422 for (IndexType i = 0; i < numRows; ++i)
427 template<
class ViewType1,
431 struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
433 static KOKKOS_INLINE_FUNCTION
void
434 run (
const ViewType1& x,
const ViewType2& y)
436 const IndexType span =
static_cast<IndexType
> (x.span());
437 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
438 using y_value_type =
typename std::decay<decltype (*y.data()) >::type;
439 const x_value_type *__restrict x_ptr(x.data());
440 y_value_type *__restrict y_ptr(y.data());
442 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
445 for (IndexType i = 0; i < span; ++i)
450 template<
class VecType1,
454 class IndexType = int,
455 bool is_contiguous =
false,
456 class BlkLayoutType =
typename BlkType::array_layout>
458 static KOKKOS_INLINE_FUNCTION
void
459 run (
const CoeffType& alpha,
464 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
465 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
466 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
468 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
469 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
472 for (IndexType i = 0; i < numRows; ++i)
473 for (IndexType j = 0; j < numCols; ++j)
474 y(i) += alpha * A(i,j) * x(j);
478 template<
class VecType1,
483 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
484 static KOKKOS_INLINE_FUNCTION
void
485 run (
const CoeffType& alpha,
490 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
491 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
492 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
494 using A_value_type =
typename std::decay<decltype (A(0,0)) >::type;
495 using x_value_type =
typename std::decay<decltype (x(0)) >::type;
496 using y_value_type =
typename std::decay<decltype (y(0)) >::type;
498 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
499 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
501 const A_value_type *__restrict A_ptr(A.data());
const IndexType as1(A.stride(1));
502 const x_value_type *__restrict x_ptr(x.data());
503 y_value_type *__restrict y_ptr(y.data());
505 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
508 for (IndexType j=0;j<numCols;++j) {
509 const x_value_type x_at_j = alpha*x_ptr[j];
510 const A_value_type *__restrict A_at_j = A_ptr + j*as1;
511 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
514 for (IndexType i=0;i<numRows;++i)
515 y_ptr[i] += A_at_j[i] * x_at_j;
520 template<
class VecType1,
525 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
526 static KOKKOS_INLINE_FUNCTION
void
527 run (
const CoeffType& alpha,
532 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
533 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
534 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
536 using A_value_type =
typename std::decay<decltype (A(0,0)) >::type;
537 using x_value_type =
typename std::decay<decltype (x(0)) >::type;
538 using y_value_type =
typename std::decay<decltype (y(0)) >::type;
540 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
541 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
543 const A_value_type *__restrict A_ptr(A.data());
const IndexType as0(A.stride(0));
544 const x_value_type *__restrict x_ptr(x.data());
545 y_value_type *__restrict y_ptr(y.data());
547 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
550 for (IndexType i=0;i<numRows;++i) {
551 y_value_type y_at_i(0);
552 const auto A_at_i = A_ptr + i*as0;
553 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
556 for (IndexType j=0;j<numCols;++j)
557 y_at_i += A_at_i[j] * x_ptr[j];
558 y_ptr[i] += alpha*y_at_i;
567 template<
class ViewType,
568 class CoefficientType,
569 class IndexType = int,
570 const int rank = ViewType::rank>
571 KOKKOS_INLINE_FUNCTION
void
572 SCAL (
const CoefficientType& alpha,
const ViewType& x)
574 using LayoutType =
typename ViewType::array_layout;
575 constexpr
bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
576 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
581 template<
class ViewType,
583 class IndexType = int,
584 const int rank = ViewType::rank>
585 KOKKOS_INLINE_FUNCTION
void
586 FILL (
const ViewType& x,
const InputType& val)
588 using LayoutType =
typename ViewType::array_layout;
589 constexpr
bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
590 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
599 template<
class CoefficientType,
602 class IndexType = int,
603 const int rank = ViewType1::rank>
604 KOKKOS_INLINE_FUNCTION
void
605 AXPY (
const CoefficientType& alpha,
609 static_assert (static_cast<int> (ViewType1::rank) ==
610 static_cast<int> (ViewType2::rank),
611 "AXPY: x and y must have the same rank.");
612 using LayoutType1 =
typename ViewType1::array_layout;
613 using LayoutType2 =
typename ViewType2::array_layout;
614 constexpr
bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
615 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
616 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
617 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
629 template<
class ViewType1,
631 class IndexType = int,
632 const int rank = ViewType1::rank>
633 KOKKOS_INLINE_FUNCTION
void
634 COPY (
const ViewType1& x,
const ViewType2& y)
636 static_assert (static_cast<int> (ViewType1::rank) ==
637 static_cast<int> (ViewType2::rank),
638 "COPY: x and y must have the same rank.");
639 using LayoutType1 =
typename ViewType1::array_layout;
640 using LayoutType2 =
typename ViewType2::array_layout;
641 constexpr
bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
642 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
643 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
644 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
659 template<
class VecType1,
663 class IndexType =
int>
664 KOKKOS_INLINE_FUNCTION
void
670 constexpr
bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
671 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
672 constexpr
bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
673 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
674 constexpr
bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
675 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
676 constexpr
bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
677 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
687 template<
class ViewType1,
690 class CoefficientType,
691 class IndexType =
int>
692 KOKKOS_INLINE_FUNCTION
void
695 const CoefficientType& alpha,
698 const CoefficientType& beta,
702 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
703 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
704 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
706 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
707 typedef Kokkos::ArithTraits<Scalar> STS;
708 const Scalar
ZERO = STS::zero();
709 const Scalar ONE = STS::one();
713 if(transA[0] ==
'N' || transA[0] ==
'n') {
714 m =
static_cast<IndexType
> (A.extent (0));
715 n =
static_cast<IndexType
> (A.extent (1));
718 m =
static_cast<IndexType
> (A.extent (1));
719 n =
static_cast<IndexType
> (A.extent (0));
721 k =
static_cast<IndexType
> (C.extent (1));
724 if(alpha == ZERO && beta == ONE)
730 for(IndexType i=0; i<m; i++) {
731 for(IndexType j=0; j<k; j++) {
737 for(IndexType i=0; i<m; i++) {
738 for(IndexType j=0; j<k; j++) {
739 C(i,j) = beta*C(i,j);
746 if(transB[0] ==
'n' || transB[0] ==
'N') {
747 if(transA[0] ==
'n' || transA[0] ==
'N') {
749 for(IndexType j=0; j<n; j++) {
751 for(IndexType i=0; i<m; i++) {
755 else if(beta != ONE) {
756 for(IndexType i=0; i<m; i++) {
757 C(i,j) = beta*C(i,j);
760 for(IndexType l=0; l<k; l++) {
761 Scalar temp = alpha*B(l,j);
762 for(IndexType i=0; i<m; i++) {
763 C(i,j) = C(i,j) + temp*A(i,l);
770 for(IndexType j=0; j<n; j++) {
771 for(IndexType i=0; i<m; i++) {
773 for(IndexType l=0; l<k; l++) {
774 temp = temp + A(l,i)*B(l,j);
780 C(i,j) = alpha*temp + beta*C(i,j);
787 if(transA[0] ==
'n' || transA[0] ==
'N') {
789 for(IndexType j=0; j<n; j++) {
791 for(IndexType i=0; i<m; i++) {
795 else if(beta != ONE) {
796 for(IndexType i=0; i<m; i++) {
797 C(i,j) = beta*C(i,j);
800 for(IndexType l=0; l<k; l++) {
801 Scalar temp = alpha*B(j,l);
802 for(IndexType i=0; i<m; i++) {
803 C(i,j) = C(i,j) + temp*A(i,l);
810 for(IndexType j=0; j<n; j++) {
811 for(IndexType i=0; i<m; i++) {
813 for(IndexType l=0; l<k; l++) {
814 temp = temp + A(l,i)*B(j,l);
820 C(i,j) = alpha*temp + beta*C(i,j);
829 template<
class LittleBlockType,
830 class LittleVectorType>
831 KOKKOS_INLINE_FUNCTION
void
832 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
835 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
836 static_assert (std::is_integral<IndexType>::value,
837 "GETF2: The type of each entry of ipiv must be an integer type.");
838 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
839 static_assert (! std::is_const<Scalar>::value,
840 "GETF2: A must not be a const View (or LittleBlock).");
841 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
842 "GETF2: ipiv must not be a const View (or LittleBlock).");
843 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
844 typedef Kokkos::ArithTraits<Scalar> STS;
845 const Scalar
ZERO = STS::zero();
847 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
848 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
849 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
852 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
853 if (pivDim < minPivDim) {
861 for(IndexType j=0; j < pivDim; j++)
865 for(IndexType i=j+1; i<numRows; i++)
867 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
878 for(IndexType i=0; i < numCols; i++)
880 Scalar temp = A(jp,i);
887 for(IndexType i=j+1; i<numRows; i++) {
888 A(i,j) = A(i,j) / A(j,j);
896 for(IndexType r=j+1; r < numRows; r++)
898 for(IndexType c=j+1; c < numCols; c++) {
899 A(r,c) = A(r,c) - A(r,j) * A(j,c);
910 template<
class LittleBlockType,
911 class LittleIntVectorType,
912 class LittleScalarVectorType,
913 const int rank = LittleScalarVectorType::rank>
915 static KOKKOS_INLINE_FUNCTION
void
916 run (
const char mode[],
917 const LittleBlockType& A,
918 const LittleIntVectorType& ipiv,
919 const LittleScalarVectorType& B,
924 template<
class LittleBlockType,
925 class LittleIntVectorType,
926 class LittleScalarVectorType>
927 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
928 static KOKKOS_INLINE_FUNCTION
void
929 run (
const char mode[],
930 const LittleBlockType& A,
931 const LittleIntVectorType& ipiv,
932 const LittleScalarVectorType& B,
936 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
939 static_assert (std::is_integral<IndexType>::value &&
940 std::is_signed<IndexType>::value,
941 "GETRS: The type of each entry of ipiv must be a signed integer.");
942 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
943 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
944 "GETRS: B must not be a const View (or LittleBlock).");
945 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
946 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
947 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
949 typedef Kokkos::ArithTraits<Scalar> STS;
950 const Scalar
ZERO = STS::zero();
951 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
952 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
953 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
958 if (numRows != numCols) {
964 if (pivDim < numRows) {
970 if(mode[0] ==
'n' || mode[0] ==
'N') {
972 for(IndexType i=0; i<numRows; i++) {
981 for(IndexType r=1; r < numRows; r++) {
982 for(IndexType c=0; c < r; c++) {
983 B(r) = B(r) - A(r,c)*B(c);
988 for(IndexType r=numRows-1; r >= 0; r--) {
995 for(IndexType c=r+1; c < numCols; c++) {
996 B(r) = B(r) - A(r,c)*B(c);
998 B(r) = B(r) / A(r,r);
1002 else if(mode[0] ==
't' || mode[0] ==
'T') {
1007 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1020 template<
class LittleBlockType,
1021 class LittleIntVectorType,
1022 class LittleScalarVectorType>
1023 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1024 static KOKKOS_INLINE_FUNCTION
void
1025 run (
const char mode[],
1026 const LittleBlockType& A,
1027 const LittleIntVectorType& ipiv,
1028 const LittleScalarVectorType& B,
1032 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1033 static_assert (std::is_integral<IndexType>::value,
1034 "GETRS: The type of each entry of ipiv must be an integer type.");
1035 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1036 "GETRS: B must not be a const View (or LittleBlock).");
1037 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1038 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1039 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1044 const IndexType numRhs = B.extent (1);
1047 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1048 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1062 template<
class LittleBlockType,
1063 class LittleIntVectorType,
1064 class LittleScalarVectorType>
1065 KOKKOS_INLINE_FUNCTION
void
1067 const LittleBlockType& A,
1068 const LittleIntVectorType& ipiv,
1069 const LittleScalarVectorType& B,
1072 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1073 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1091 template<
class LittleBlockType,
1092 class LittleIntVectorType,
1093 class LittleScalarVectorType>
1094 KOKKOS_INLINE_FUNCTION
void
1096 const LittleIntVectorType& ipiv,
1097 const LittleScalarVectorType& work,
1101 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1104 static_assert (std::is_integral<IndexType>::value &&
1105 std::is_signed<IndexType>::value,
1106 "GETRI: The type of each entry of ipiv must be a signed integer.");
1107 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1108 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1109 "GETRI: A must not be a const View (or LittleBlock).");
1110 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1111 "GETRI: work must not be a const View (or LittleBlock).");
1112 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1113 typedef Kokkos::ArithTraits<Scalar> STS;
1114 const Scalar
ZERO = STS::zero();
1115 const Scalar ONE = STS::one();
1117 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1118 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1119 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
1120 const IndexType workDim =
static_cast<IndexType
> (work.extent (0));
1125 if (numRows != numCols) {
1131 if (pivDim < numRows) {
1137 if (workDim < numRows) {
1143 for(IndexType j=0; j < numRows; j++) {
1144 if(A(j,j) ==
ZERO) {
1149 A(j,j) = ONE / A(j,j);
1152 for(IndexType r=0; r < j; r++) {
1153 A(r,j) = A(r,r)*A(r,j);
1154 for(IndexType c=r+1; c < j; c++) {
1155 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1158 for(IndexType r=0; r < j; r++) {
1159 A(r,j) = -A(j,j)*A(r,j);
1164 for(IndexType j = numCols-2; j >= 0; j--) {
1166 for(IndexType r=j+1; r < numRows; r++) {
1171 for(IndexType r=0; r < numRows; r++) {
1172 for(IndexType i=j+1; i < numRows; i++) {
1173 A(r,j) = A(r,j) - work(i)*A(r,i);
1179 for(IndexType j=numRows-1; j >= 0; j--) {
1180 IndexType jp = ipiv(j)-1;
1182 for(IndexType r=0; r < numRows; r++) {
1183 Scalar temp = A(r,j);
1196 template<
class LittleBlockType,
1197 class LittleVectorTypeX,
1198 class LittleVectorTypeY,
1199 class CoefficientType,
1200 class IndexType =
int>
1201 KOKKOS_INLINE_FUNCTION
void
1202 GEMV (
const char trans,
1203 const CoefficientType& alpha,
1204 const LittleBlockType& A,
1205 const LittleVectorTypeX& x,
1206 const CoefficientType& beta,
1207 const LittleVectorTypeY& y)
1213 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1214 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1215 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1219 for (IndexType i = 0; i < numRows; ++i) {
1224 for (IndexType i = 0; i < numRows; ++i) {
1225 y_value_type y_i = 0.0;
1226 for (IndexType j = 0; j < numCols; ++j) {
1227 y_i += A(i,j) * x(j);
1236 for (IndexType i = 0; i < numRows; ++i) {
1241 for (IndexType i = 0; i < numRows; ++i) {
1247 for (IndexType i = 0; i < numRows; ++i) {
1248 y_value_type y_i = beta * y(i);
1249 for (IndexType j = 0; j < numCols; ++j) {
1250 y_i += alpha * A(i,j) * x(j);
1262 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.