42 #ifndef TPETRA_BLOCKVIEW_HPP
43 #define TPETRA_BLOCKVIEW_HPP
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
72 template<
class ViewType1,
74 const int rank1 = ViewType1::rank>
76 static KOKKOS_INLINE_FUNCTION
void
77 run (
const ViewType2& Y,
const ViewType1& X);
84 template<
class ViewType1,
86 struct AbsMax<ViewType1, ViewType2, 2> {
89 static KOKKOS_INLINE_FUNCTION
void
90 run (
const ViewType2& Y,
const ViewType1& X)
92 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
93 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
95 static_assert(! std::is_const<STY>::value,
96 "AbsMax: The type of each entry of Y must be nonconst.");
97 typedef typename std::decay<decltype (X(0,0)) >::type STX;
98 static_assert( std::is_same<STX, STY>::value,
99 "AbsMax: The type of each entry of X and Y must be the same.");
100 typedef Kokkos::ArithTraits<STY> KAT;
102 const int numCols = Y.extent (1);
103 const int numRows = Y.extent (0);
104 for (
int j = 0; j < numCols; ++j) {
105 for (
int i = 0; i < numRows; ++i) {
107 const STX X_ij = X(i,j);
110 const auto Y_ij_abs = KAT::abs (Y_ij);
111 const auto X_ij_abs = KAT::abs (X_ij);
112 Y_ij = (Y_ij_abs >= X_ij_abs) ?
113 static_cast<STY> (Y_ij_abs) :
114 static_cast<STY
> (X_ij_abs);
124 template<
class ViewType1,
129 static KOKKOS_INLINE_FUNCTION
void
130 run (
const ViewType2& Y,
const ViewType1& X)
132 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
133 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
135 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
136 static_assert(! std::is_const<STY>::value,
137 "AbsMax: The type of each entry of Y must be nonconst.");
138 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
139 static_assert( std::is_same<STX, STY>::value,
140 "AbsMax: The type of each entry of X and Y must be the same.");
141 typedef Kokkos::ArithTraits<STY> KAT;
143 const int numRows = Y.extent (0);
144 for (
int i = 0; i < numRows; ++i) {
146 const STX X_i = X(i);
149 const auto Y_i_abs = KAT::abs (Y_i);
150 const auto X_i_abs = KAT::abs (X_i);
151 Y_i = (Y_i_abs >= X_i_abs) ?
152 static_cast<STY> (Y_i_abs) :
153 static_cast<STY
> (X_i_abs);
164 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
165 KOKKOS_INLINE_FUNCTION
void
166 absMax (
const ViewType2& Y,
const ViewType1& X)
168 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
169 "absMax: ViewType1 and ViewType2 must have the same rank.");
177 template<
class ViewType,
178 class CoefficientType,
179 class IndexType = int,
180 const bool is_contiguous =
false,
181 const int rank = ViewType::rank>
183 static KOKKOS_INLINE_FUNCTION
void
184 run (
const CoefficientType& alpha,
const ViewType& x);
189 template<
class ViewType,
190 class CoefficientType,
192 struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
194 static KOKKOS_INLINE_FUNCTION
void
195 run (
const CoefficientType& alpha,
const ViewType& x)
197 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
200 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
203 for (IndexType i = 0; i < numRows; ++i)
209 template<
class ViewType,
210 class CoefficientType,
212 struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
214 static KOKKOS_INLINE_FUNCTION
void
215 run (
const CoefficientType& alpha,
const ViewType& A)
217 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
218 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
220 for (IndexType j = 0; j < numCols; ++j)
221 for (IndexType i = 0; i < numRows; ++i)
222 A(i,j) = alpha * A(i,j);
225 template<
class ViewType,
226 class CoefficientType,
229 struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
231 static KOKKOS_INLINE_FUNCTION
void
232 run (
const CoefficientType& alpha,
const ViewType& x)
234 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
235 const IndexType span =
static_cast<IndexType
> (x.span());
236 x_value_type *__restrict x_ptr(x.data());
237 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
240 for (IndexType i = 0; i < span; ++i)
241 x_ptr[i] = alpha * x_ptr[i];
249 template<
class ViewType,
251 class IndexType = int,
252 const bool is_contiguous =
false,
253 const int rank = ViewType::rank>
255 static KOKKOS_INLINE_FUNCTION
void
256 run (
const ViewType& x,
const InputType& val);
261 template<
class ViewType,
264 struct FILL<ViewType, InputType, IndexType, false, 1> {
265 static KOKKOS_INLINE_FUNCTION
void
266 run (
const ViewType& x,
const InputType& val)
268 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
269 for (IndexType i = 0; i < numRows; ++i)
275 template<
class ViewType,
278 struct FILL<ViewType, InputType, IndexType, false, 2> {
279 static KOKKOS_INLINE_FUNCTION
void
280 run (
const ViewType& X,
const InputType& val)
282 const IndexType numRows =
static_cast<IndexType
> (X.extent (0));
283 const IndexType numCols =
static_cast<IndexType
> (X.extent (1));
284 for (IndexType j = 0; j < numCols; ++j)
285 for (IndexType i = 0; i < numRows; ++i)
289 template<
class ViewType,
293 struct FILL<ViewType, InputType, IndexType, true, rank> {
294 static KOKKOS_INLINE_FUNCTION
void
295 run (
const ViewType& x,
const InputType& val)
297 const IndexType span =
static_cast<IndexType
> (x.span());
298 auto x_ptr = x.data();
299 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
302 for (IndexType i = 0; i < span; ++i)
312 template<
class CoefficientType,
315 class IndexType = int,
316 const bool is_contiguous =
false,
317 const int rank = ViewType1::rank>
319 static KOKKOS_INLINE_FUNCTION
void
320 run (
const CoefficientType& alpha,
327 template<
class CoefficientType,
331 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
333 static KOKKOS_INLINE_FUNCTION
void
334 run (
const CoefficientType& alpha,
338 using Kokkos::ArithTraits;
339 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
340 "AXPY: x and y must have the same rank.");
342 const IndexType numRows =
static_cast<IndexType
> (y.extent (0));
343 if (alpha != ArithTraits<CoefficientType>::zero ()) {
345 for (IndexType i = 0; i < numRows; ++i)
346 y(i) += alpha * x(i);
353 template<
class CoefficientType,
357 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
359 static KOKKOS_INLINE_FUNCTION
void
360 run (
const CoefficientType& alpha,
364 using Kokkos::ArithTraits;
365 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
366 "AXPY: X and Y must have the same rank.");
367 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
368 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
370 if (alpha != ArithTraits<CoefficientType>::zero ()) {
371 for (IndexType j = 0; j < numCols; ++j)
372 for (IndexType i = 0; i < numRows; ++i)
373 Y(i,j) += alpha * X(i,j);
378 template<
class CoefficientType,
383 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
385 static KOKKOS_INLINE_FUNCTION
void
386 run (
const CoefficientType& alpha,
390 using Kokkos::ArithTraits;
391 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
392 "AXPY: x and y must have the same rank.");
394 if (alpha != ArithTraits<CoefficientType>::zero ()) {
395 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
396 using y_value_type =
typename std::decay<decltype (*y.data()) >::type;
397 const IndexType span =
static_cast<IndexType
> (y.span());
398 const x_value_type *__restrict x_ptr(x.data());
399 y_value_type *__restrict y_ptr(y.data());
400 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
403 for (IndexType i = 0; i < span; ++i)
404 y_ptr[i] += alpha * x_ptr[i];
413 template<
class ViewType1,
415 class IndexType = int,
416 const bool is_contiguous =
false,
417 const int rank = ViewType1::rank>
419 static KOKKOS_INLINE_FUNCTION
void
420 run (
const ViewType1& x,
const ViewType2& y);
425 template<
class ViewType1,
428 struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
430 static KOKKOS_INLINE_FUNCTION
void
431 run (
const ViewType1& x,
const ViewType2& y)
433 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
435 for (IndexType i = 0; i < numRows; ++i)
442 template<
class ViewType1,
445 struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
447 static KOKKOS_INLINE_FUNCTION
void
448 run (
const ViewType1& X,
const ViewType2& Y)
450 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
451 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
453 for (IndexType j = 0; j < numCols; ++j)
454 for (IndexType i = 0; i < numRows; ++i)
459 template<
class ViewType1,
463 struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
465 static KOKKOS_INLINE_FUNCTION
void
466 run (
const ViewType1& x,
const ViewType2& y)
468 const IndexType span =
static_cast<IndexType
> (x.span());
469 using x_value_type =
typename std::decay<decltype (*x.data()) >::type;
470 using y_value_type =
typename std::decay<decltype (*y.data()) >::type;
471 const x_value_type *__restrict x_ptr(x.data());
472 y_value_type *__restrict y_ptr(y.data());
474 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
477 for (IndexType i = 0; i < span; ++i)
482 template<
class VecType1,
486 class IndexType = int,
487 bool is_contiguous =
false,
488 class BlkLayoutType =
typename BlkType::array_layout>
490 static KOKKOS_INLINE_FUNCTION
void
491 run (
const CoeffType& alpha,
496 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
497 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
498 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
500 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
501 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
504 for (IndexType i = 0; i < numRows; ++i)
505 for (IndexType j = 0; j < numCols; ++j)
506 y(i) += alpha * A(i,j) * x(j);
510 template<
class VecType1,
515 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
516 static KOKKOS_INLINE_FUNCTION
void
517 run (
const CoeffType& alpha,
522 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
523 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
524 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
526 using A_value_type =
typename std::decay<decltype (A(0,0)) >::type;
527 using x_value_type =
typename std::decay<decltype (x(0)) >::type;
528 using y_value_type =
typename std::decay<decltype (y(0)) >::type;
530 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
531 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
533 const A_value_type *__restrict A_ptr(A.data());
const IndexType as1(A.stride(1));
534 const x_value_type *__restrict x_ptr(x.data());
535 y_value_type *__restrict y_ptr(y.data());
537 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
540 for (IndexType j=0;j<numCols;++j) {
541 const x_value_type x_at_j = alpha*x_ptr[j];
542 const A_value_type *__restrict A_at_j = A_ptr + j*as1;
543 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
546 for (IndexType i=0;i<numRows;++i)
547 y_ptr[i] += A_at_j[i] * x_at_j;
552 template<
class VecType1,
557 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
558 static KOKKOS_INLINE_FUNCTION
void
559 run (
const CoeffType& alpha,
564 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
565 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
566 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
568 using A_value_type =
typename std::decay<decltype (A(0,0)) >::type;
569 using x_value_type =
typename std::decay<decltype (x(0)) >::type;
570 using y_value_type =
typename std::decay<decltype (y(0)) >::type;
572 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
573 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
575 const A_value_type *__restrict A_ptr(A.data());
const IndexType as0(A.stride(0));
576 const x_value_type *__restrict x_ptr(x.data());
577 y_value_type *__restrict y_ptr(y.data());
579 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
582 for (IndexType i=0;i<numRows;++i) {
583 y_value_type y_at_i(0);
584 const auto A_at_i = A_ptr + i*as0;
585 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
588 for (IndexType j=0;j<numCols;++j)
589 y_at_i += A_at_i[j] * x_ptr[j];
590 y_ptr[i] += alpha*y_at_i;
599 template<
class ViewType,
600 class CoefficientType,
601 class IndexType = int,
602 const int rank = ViewType::rank>
603 KOKKOS_INLINE_FUNCTION
void
604 SCAL (
const CoefficientType& alpha,
const ViewType& x)
606 using LayoutType =
typename ViewType::array_layout;
607 constexpr
bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
608 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
613 template<
class ViewType,
615 class IndexType = int,
616 const int rank = ViewType::rank>
617 KOKKOS_INLINE_FUNCTION
void
618 FILL (
const ViewType& x,
const InputType& val)
620 using LayoutType =
typename ViewType::array_layout;
621 constexpr
bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
622 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
631 template<
class CoefficientType,
634 class IndexType = int,
635 const int rank = ViewType1::rank>
636 KOKKOS_INLINE_FUNCTION
void
637 AXPY (
const CoefficientType& alpha,
641 static_assert (static_cast<int> (ViewType1::rank) ==
642 static_cast<int> (ViewType2::rank),
643 "AXPY: x and y must have the same rank.");
644 using LayoutType1 =
typename ViewType1::array_layout;
645 using LayoutType2 =
typename ViewType2::array_layout;
646 constexpr
bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
647 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
648 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
649 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
661 template<
class ViewType1,
663 class IndexType = int,
664 const int rank = ViewType1::rank>
665 KOKKOS_INLINE_FUNCTION
void
666 COPY (
const ViewType1& x,
const ViewType2& y)
668 static_assert (static_cast<int> (ViewType1::rank) ==
669 static_cast<int> (ViewType2::rank),
670 "COPY: x and y must have the same rank.");
671 using LayoutType1 =
typename ViewType1::array_layout;
672 using LayoutType2 =
typename ViewType2::array_layout;
673 constexpr
bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
674 constexpr
bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
675 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
676 constexpr
bool is_contiguous = is_layout_same && is_x_contiguous;
691 template<
class VecType1,
695 class IndexType =
int>
696 KOKKOS_INLINE_FUNCTION
void
702 constexpr
bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
703 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
704 constexpr
bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
705 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
706 constexpr
bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
707 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
708 constexpr
bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
709 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
719 template<
class ViewType1,
722 class CoefficientType,
723 class IndexType =
int>
724 KOKKOS_INLINE_FUNCTION
void
727 const CoefficientType& alpha,
730 const CoefficientType& beta,
734 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
735 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
736 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
738 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
739 typedef Kokkos::ArithTraits<Scalar> STS;
740 const Scalar
ZERO = STS::zero();
741 const Scalar ONE = STS::one();
745 if(transA[0] ==
'N' || transA[0] ==
'n') {
746 m =
static_cast<IndexType
> (A.extent (0));
747 n =
static_cast<IndexType
> (A.extent (1));
750 m =
static_cast<IndexType
> (A.extent (1));
751 n =
static_cast<IndexType
> (A.extent (0));
753 k =
static_cast<IndexType
> (C.extent (1));
756 if(alpha == ZERO && beta == ONE)
762 for(IndexType i=0; i<m; i++) {
763 for(IndexType j=0; j<k; j++) {
769 for(IndexType i=0; i<m; i++) {
770 for(IndexType j=0; j<k; j++) {
771 C(i,j) = beta*C(i,j);
778 if(transB[0] ==
'n' || transB[0] ==
'N') {
779 if(transA[0] ==
'n' || transA[0] ==
'N') {
781 for(IndexType j=0; j<n; j++) {
783 for(IndexType i=0; i<m; i++) {
787 else if(beta != ONE) {
788 for(IndexType i=0; i<m; i++) {
789 C(i,j) = beta*C(i,j);
792 for(IndexType l=0; l<k; l++) {
793 Scalar temp = alpha*B(l,j);
794 for(IndexType i=0; i<m; i++) {
795 C(i,j) = C(i,j) + temp*A(i,l);
802 for(IndexType j=0; j<n; j++) {
803 for(IndexType i=0; i<m; i++) {
805 for(IndexType l=0; l<k; l++) {
806 temp = temp + A(l,i)*B(l,j);
812 C(i,j) = alpha*temp + beta*C(i,j);
819 if(transA[0] ==
'n' || transA[0] ==
'N') {
821 for(IndexType j=0; j<n; j++) {
823 for(IndexType i=0; i<m; i++) {
827 else if(beta != ONE) {
828 for(IndexType i=0; i<m; i++) {
829 C(i,j) = beta*C(i,j);
832 for(IndexType l=0; l<k; l++) {
833 Scalar temp = alpha*B(j,l);
834 for(IndexType i=0; i<m; i++) {
835 C(i,j) = C(i,j) + temp*A(i,l);
842 for(IndexType j=0; j<n; j++) {
843 for(IndexType i=0; i<m; i++) {
845 for(IndexType l=0; l<k; l++) {
846 temp = temp + A(l,i)*B(j,l);
852 C(i,j) = alpha*temp + beta*C(i,j);
861 template<
class LittleBlockType,
862 class LittleVectorType>
863 KOKKOS_INLINE_FUNCTION
void
864 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
867 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
868 static_assert (std::is_integral<IndexType>::value,
869 "GETF2: The type of each entry of ipiv must be an integer type.");
870 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
871 static_assert (! std::is_const<Scalar>::value,
872 "GETF2: A must not be a const View (or LittleBlock).");
873 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
874 "GETF2: ipiv must not be a const View (or LittleBlock).");
875 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
876 typedef Kokkos::ArithTraits<Scalar> STS;
877 const Scalar
ZERO = STS::zero();
879 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
880 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
881 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
884 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
885 if (pivDim < minPivDim) {
893 for(IndexType j=0; j < pivDim; j++)
897 for(IndexType i=j+1; i<numRows; i++)
899 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
910 for(IndexType i=0; i < numCols; i++)
912 Scalar temp = A(jp,i);
919 for(IndexType i=j+1; i<numRows; i++) {
920 A(i,j) = A(i,j) / A(j,j);
928 for(IndexType r=j+1; r < numRows; r++)
930 for(IndexType c=j+1; c < numCols; c++) {
931 A(r,c) = A(r,c) - A(r,j) * A(j,c);
942 template<
class LittleBlockType,
943 class LittleIntVectorType,
944 class LittleScalarVectorType,
945 const int rank = LittleScalarVectorType::rank>
947 static KOKKOS_INLINE_FUNCTION
void
948 run (
const char mode[],
949 const LittleBlockType& A,
950 const LittleIntVectorType& ipiv,
951 const LittleScalarVectorType& B,
956 template<
class LittleBlockType,
957 class LittleIntVectorType,
958 class LittleScalarVectorType>
959 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
960 static KOKKOS_INLINE_FUNCTION
void
961 run (
const char mode[],
962 const LittleBlockType& A,
963 const LittleIntVectorType& ipiv,
964 const LittleScalarVectorType& B,
968 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
971 static_assert (std::is_integral<IndexType>::value &&
972 std::is_signed<IndexType>::value,
973 "GETRS: The type of each entry of ipiv must be a signed integer.");
974 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
975 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
976 "GETRS: B must not be a const View (or LittleBlock).");
977 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
978 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
979 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
981 typedef Kokkos::ArithTraits<Scalar> STS;
982 const Scalar
ZERO = STS::zero();
983 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
984 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
985 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
990 if (numRows != numCols) {
996 if (pivDim < numRows) {
1002 if(mode[0] ==
'n' || mode[0] ==
'N') {
1004 for(IndexType i=0; i<numRows; i++) {
1005 if(ipiv(i) != i+1) {
1007 B(i) = B(ipiv(i)-1);
1008 B(ipiv(i)-1) = temp;
1013 for(IndexType r=1; r < numRows; r++) {
1014 for(IndexType c=0; c < r; c++) {
1015 B(r) = B(r) - A(r,c)*B(c);
1020 for(IndexType r=numRows-1; r >= 0; r--) {
1022 if(A(r,r) == ZERO) {
1027 for(IndexType c=r+1; c < numCols; c++) {
1028 B(r) = B(r) - A(r,c)*B(c);
1030 B(r) = B(r) / A(r,r);
1034 else if(mode[0] ==
't' || mode[0] ==
'T') {
1039 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1052 template<
class LittleBlockType,
1053 class LittleIntVectorType,
1054 class LittleScalarVectorType>
1055 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1056 static KOKKOS_INLINE_FUNCTION
void
1057 run (
const char mode[],
1058 const LittleBlockType& A,
1059 const LittleIntVectorType& ipiv,
1060 const LittleScalarVectorType& B,
1064 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1065 static_assert (std::is_integral<IndexType>::value,
1066 "GETRS: The type of each entry of ipiv must be an integer type.");
1067 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1068 "GETRS: B must not be a const View (or LittleBlock).");
1069 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1070 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1071 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1076 const IndexType numRhs = B.extent (1);
1079 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1080 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1094 template<
class LittleBlockType,
1095 class LittleIntVectorType,
1096 class LittleScalarVectorType>
1097 KOKKOS_INLINE_FUNCTION
void
1099 const LittleBlockType& A,
1100 const LittleIntVectorType& ipiv,
1101 const LittleScalarVectorType& B,
1104 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1105 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1123 template<
class LittleBlockType,
1124 class LittleIntVectorType,
1125 class LittleScalarVectorType>
1126 KOKKOS_INLINE_FUNCTION
void
1128 const LittleIntVectorType& ipiv,
1129 const LittleScalarVectorType& work,
1133 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1136 static_assert (std::is_integral<IndexType>::value &&
1137 std::is_signed<IndexType>::value,
1138 "GETRI: The type of each entry of ipiv must be a signed integer.");
1139 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1140 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1141 "GETRI: A must not be a const View (or LittleBlock).");
1142 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1143 "GETRI: work must not be a const View (or LittleBlock).");
1144 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1145 typedef Kokkos::ArithTraits<Scalar> STS;
1146 const Scalar
ZERO = STS::zero();
1147 const Scalar ONE = STS::one();
1149 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1150 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1151 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
1152 const IndexType workDim =
static_cast<IndexType
> (work.extent (0));
1157 if (numRows != numCols) {
1163 if (pivDim < numRows) {
1169 if (workDim < numRows) {
1175 for(IndexType j=0; j < numRows; j++) {
1176 if(A(j,j) ==
ZERO) {
1181 A(j,j) = ONE / A(j,j);
1184 for(IndexType r=0; r < j; r++) {
1185 A(r,j) = A(r,r)*A(r,j);
1186 for(IndexType c=r+1; c < j; c++) {
1187 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1190 for(IndexType r=0; r < j; r++) {
1191 A(r,j) = -A(j,j)*A(r,j);
1196 for(IndexType j = numCols-2; j >= 0; j--) {
1198 for(IndexType r=j+1; r < numRows; r++) {
1203 for(IndexType r=0; r < numRows; r++) {
1204 for(IndexType i=j+1; i < numRows; i++) {
1205 A(r,j) = A(r,j) - work(i)*A(r,i);
1211 for(IndexType j=numRows-1; j >= 0; j--) {
1212 IndexType jp = ipiv(j)-1;
1214 for(IndexType r=0; r < numRows; r++) {
1215 Scalar temp = A(r,j);
1228 template<
class LittleBlockType,
1229 class LittleVectorTypeX,
1230 class LittleVectorTypeY,
1231 class CoefficientType,
1232 class IndexType =
int>
1233 KOKKOS_INLINE_FUNCTION
void
1234 GEMV (
const char trans,
1235 const CoefficientType& alpha,
1236 const LittleBlockType& A,
1237 const LittleVectorTypeX& x,
1238 const CoefficientType& beta,
1239 const LittleVectorTypeY& y)
1245 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1246 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1247 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1251 for (IndexType i = 0; i < numRows; ++i) {
1256 for (IndexType i = 0; i < numRows; ++i) {
1257 y_value_type y_i = 0.0;
1258 for (IndexType j = 0; j < numCols; ++j) {
1259 y_i += A(i,j) * x(j);
1268 for (IndexType i = 0; i < numRows; ++i) {
1273 for (IndexType i = 0; i < numRows; ++i) {
1279 for (IndexType i = 0; i < numRows; ++i) {
1280 y_value_type y_i = beta * y(i);
1281 for (IndexType j = 0; j < numCols; ++j) {
1282 y_i += alpha * A(i,j) * x(j);
1294 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.