42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
70 namespace Experimental {
80 template<
class ViewType1,
82 const int rank1 = ViewType1::rank>
84 static KOKKOS_INLINE_FUNCTION
void
85 run (
const ViewType2& Y,
const ViewType1& X);
92 template<
class ViewType1,
94 struct AbsMax<ViewType1, ViewType2, 2> {
97 static KOKKOS_INLINE_FUNCTION
void
98 run (
const ViewType2& Y,
const ViewType1& X)
100 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103 static_assert(! std::is_const<STY>::value,
104 "AbsMax: The type of each entry of Y must be nonconst.");
105 typedef typename std::decay<decltype (X(0,0)) >::type STX;
106 static_assert( std::is_same<STX, STY>::value,
107 "AbsMax: The type of each entry of X and Y must be the same.");
108 typedef Kokkos::Details::ArithTraits<STY> KAT;
110 const int numCols = Y.extent (1);
111 const int numRows = Y.extent (0);
112 for (
int j = 0; j < numCols; ++j) {
113 for (
int i = 0; i < numRows; ++i) {
115 const STX X_ij = X(i,j);
118 const auto Y_ij_abs = KAT::abs (Y_ij);
119 const auto X_ij_abs = KAT::abs (X_ij);
120 Y_ij = (Y_ij_abs >= X_ij_abs) ?
121 static_cast<STY> (Y_ij_abs) :
122 static_cast<STY
> (X_ij_abs);
132 template<
class ViewType1,
137 static KOKKOS_INLINE_FUNCTION
void
138 run (
const ViewType2& Y,
const ViewType1& X)
140 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
143 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144 static_assert(! std::is_const<STY>::value,
145 "AbsMax: The type of each entry of Y must be nonconst.");
146 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147 static_assert( std::is_same<STX, STY>::value,
148 "AbsMax: The type of each entry of X and Y must be the same.");
149 typedef Kokkos::Details::ArithTraits<STY> KAT;
151 const int numRows = Y.extent (0);
152 for (
int i = 0; i < numRows; ++i) {
154 const STX X_i = X(i);
157 const auto Y_i_abs = KAT::abs (Y_i);
158 const auto X_i_abs = KAT::abs (X_i);
159 Y_i = (Y_i_abs >= X_i_abs) ?
160 static_cast<STY> (Y_i_abs) :
161 static_cast<STY
> (X_i_abs);
172 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION
void
174 absMax (
const ViewType2& Y,
const ViewType1& X)
176 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177 "absMax: ViewType1 and ViewType2 must have the same rank.");
178 AbsMax<ViewType1, ViewType2, rank>::run (Y, X);
185 template<
class ViewType,
186 class CoefficientType,
187 class LayoutType =
typename ViewType::array_layout,
188 class IndexType = int,
189 const int rank = ViewType::rank>
191 static KOKKOS_INLINE_FUNCTION
void
192 run (
const CoefficientType& alpha,
const ViewType& x);
197 template<
class ViewType,
198 class CoefficientType,
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203 static KOKKOS_INLINE_FUNCTION
void
204 run (
const CoefficientType& alpha,
const ViewType& x)
206 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
208 for (IndexType i = 0; i < numRows; ++i) {
216 template<
class ViewType,
217 class CoefficientType,
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222 static KOKKOS_INLINE_FUNCTION
void
223 run (
const CoefficientType& alpha,
const ViewType& A)
225 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
226 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
229 for (IndexType i = 0; i < numRows; ++i) {
230 for (IndexType j = 0; j < numCols; ++j) {
231 A(i,j) = alpha * A(i,j);
242 template<
class ViewType,
243 class CoefficientType,
245 struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
247 static KOKKOS_INLINE_FUNCTION
void
248 run (
const CoefficientType& alpha,
const ViewType& A)
250 const IndexType N = A.size ();
251 typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252 scalar_type*
const A_raw = A.data ();
254 for (IndexType i = 0; i < N; ++i) {
255 A_raw[i] = alpha * A_raw[i];
265 template<
class ViewType,
267 class LayoutType =
typename ViewType::array_layout,
268 class IndexType = int,
269 const int rank = ViewType::rank>
271 static KOKKOS_INLINE_FUNCTION
void
272 run (
const ViewType& x,
const InputType& val);
277 template<
class ViewType,
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282 static KOKKOS_INLINE_FUNCTION
void
283 run (
const ViewType& x,
const InputType& val)
285 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
286 for (IndexType i = 0; i < numRows; ++i) {
294 template<
class ViewType,
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299 static KOKKOS_INLINE_FUNCTION
void
300 run (
const ViewType& X,
const InputType& val)
302 const IndexType numRows =
static_cast<IndexType
> (X.extent (0));
303 const IndexType numCols =
static_cast<IndexType
> (X.extent (1));
304 for (IndexType j = 0; j < numCols; ++j) {
305 for (IndexType i = 0; i < numRows; ++i) {
316 template<
class CoefficientType,
319 class LayoutType1 =
typename ViewType1::array_layout,
320 class LayoutType2 =
typename ViewType2::array_layout,
321 class IndexType = int,
322 const int rank = ViewType1::rank>
324 static KOKKOS_INLINE_FUNCTION
void
325 run (
const CoefficientType& alpha,
332 template<
class CoefficientType,
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340 static KOKKOS_INLINE_FUNCTION
void
341 run (
const CoefficientType& alpha,
345 using Kokkos::Details::ArithTraits;
346 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
347 "AXPY: x and y must have the same rank.");
349 const IndexType numRows =
static_cast<IndexType
> (y.extent (0));
350 if (alpha != ArithTraits<CoefficientType>::zero ()) {
351 for (IndexType i = 0; i < numRows; ++i) {
352 y(i) += alpha * x(i);
360 template<
class CoefficientType,
366 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
368 static KOKKOS_INLINE_FUNCTION
void
369 run (
const CoefficientType& alpha,
373 using Kokkos::Details::ArithTraits;
374 static_assert (ViewType1::rank == ViewType2::rank,
375 "AXPY: X and Y must have the same rank.");
376 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
377 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
379 if (alpha != ArithTraits<CoefficientType>::zero ()) {
380 for (IndexType i = 0; i < numRows; ++i) {
381 for (IndexType j = 0; j < numCols; ++j) {
382 Y(i,j) += alpha * X(i,j);
392 template<
class CoefficientType,
396 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
398 static KOKKOS_INLINE_FUNCTION
void
399 run (
const CoefficientType& alpha,
403 using Kokkos::Details::ArithTraits;
404 static_assert (static_cast<int> (ViewType1::rank) ==
405 static_cast<int> (ViewType2::rank),
406 "AXPY: X and Y must have the same rank.");
407 typedef typename std::decay<decltype (X(0,0)) >::type SX;
408 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
410 const IndexType N =
static_cast<IndexType
> (Y.size ());
411 const SX*
const X_raw = X.data ();
412 SY*
const Y_raw = Y.data ();
414 if (alpha != ArithTraits<CoefficientType>::zero ()) {
415 for (IndexType i = 0; i < N; ++i) {
416 Y_raw[i] += alpha * X_raw[i];
425 template<
class CoefficientType,
429 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
431 static KOKKOS_INLINE_FUNCTION
void
432 run (
const CoefficientType& alpha,
436 using Kokkos::Details::ArithTraits;
437 static_assert (ViewType1::rank == ViewType2::rank,
438 "AXPY: X and Y must have the same rank.");
439 typedef typename std::decay<decltype (X(0,0)) >::type SX;
440 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
442 const IndexType N =
static_cast<IndexType
> (Y.size ());
443 const SX*
const X_raw = X.data ();
444 SY*
const Y_raw = Y.data ();
446 if (alpha != ArithTraits<CoefficientType>::zero ()) {
447 for (IndexType i = 0; i < N; ++i) {
448 Y_raw[i] += alpha * X_raw[i];
458 template<
class ViewType1,
460 class LayoutType1 =
typename ViewType1::array_layout,
461 class LayoutType2 =
typename ViewType2::array_layout,
462 class IndexType = int,
463 const int rank = ViewType1::rank>
465 static KOKKOS_INLINE_FUNCTION
void
466 run (
const ViewType1& x,
const ViewType2& y);
471 template<
class ViewType1,
476 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
478 static KOKKOS_INLINE_FUNCTION
void
479 run (
const ViewType1& x,
const ViewType2& y)
481 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
482 for (IndexType i = 0; i < numRows; ++i) {
490 template<
class ViewType1,
495 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
497 static KOKKOS_INLINE_FUNCTION
void
498 run (
const ViewType1& X,
const ViewType2& Y)
500 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
501 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
504 for (IndexType i = 0; i < numRows; ++i) {
505 for (IndexType j = 0; j < numCols; ++j) {
515 template<
class ViewType1,
518 struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
520 static KOKKOS_INLINE_FUNCTION
void
521 run (
const ViewType1& X,
const ViewType2& Y)
523 typedef typename std::decay<decltype (X(0,0)) >::type SX;
524 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
526 const IndexType N =
static_cast<IndexType
> (Y.size ());
527 const SX*
const X_raw = X.data ();
528 SY*
const Y_raw = Y.data ();
531 for (IndexType i = 0; i < N; ++i) {
540 template<
class ViewType1,
543 struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
545 static KOKKOS_INLINE_FUNCTION
void
546 run (
const ViewType1& X,
const ViewType2& Y)
548 typedef typename std::decay<decltype (X(0,0)) >::type SX;
549 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
551 const IndexType N =
static_cast<IndexType
> (Y.size ());
552 const SX*
const X_raw = X.data ();
553 SY*
const Y_raw = Y.data ();
556 for (IndexType i = 0; i < N; ++i) {
563 template<
class VecType1,
567 class IndexType = int,
568 class VecLayoutType1 =
typename VecType1::array_layout,
569 class BlkLayoutType =
typename BlkType::array_layout,
570 class VecLayoutType2 =
typename VecType2::array_layout>
572 static KOKKOS_INLINE_FUNCTION
void
573 run (
const CoeffType& alpha,
578 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
579 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
580 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
581 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
583 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
584 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
586 for (IndexType i = 0; i < numRows; ++i) {
587 y_elt_type y_i = y(i);
588 for (IndexType j = 0; j < numCols; ++j) {
589 y_i += alpha * A(i,j) * x(j);
596 template<
class VecType1,
601 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
602 Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
604 static KOKKOS_INLINE_FUNCTION
void
605 run (
const CoeffType& alpha,
610 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
611 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
612 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
613 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
614 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
616 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
617 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
618 const A_elt_type*
const A_raw = A.data ();
620 for (IndexType i = 0; i < numRows; ++i) {
621 y_elt_type y_i = y(i);
622 const A_elt_type*
const A_i = A_raw + i*numCols;
623 for (IndexType j = 0; j < numCols; ++j) {
624 y_i += alpha * A_i[j] * x(j);
631 template<
class VecType1,
636 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
637 Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
639 static KOKKOS_INLINE_FUNCTION
void
640 run (
const CoeffType& alpha,
645 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
646 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
647 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
648 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
650 const A_elt_type*
const A_raw = A.data ();
651 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
652 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
653 for (IndexType j = 0; j < numCols; ++j) {
654 const A_elt_type*
const A_j = A_raw + j*numRows;
655 for (IndexType i = 0; i < numRows; ++i) {
656 y(i) += alpha * A_j[i] * x(i);
666 template<
class ViewType,
667 class CoefficientType,
668 class LayoutType =
typename ViewType::array_layout,
669 class IndexType = int,
670 const int rank = ViewType::rank>
671 KOKKOS_INLINE_FUNCTION
void
672 SCAL (
const CoefficientType& alpha,
const ViewType& x)
678 template<
class ViewType,
680 class LayoutType =
typename ViewType::array_layout,
681 class IndexType = int,
682 const int rank = ViewType::rank>
683 KOKKOS_INLINE_FUNCTION
void
684 FILL (
const ViewType& x,
const InputType& val)
694 template<
class CoefficientType,
697 class LayoutType1 =
typename ViewType1::array_layout,
698 class LayoutType2 =
typename ViewType2::array_layout,
699 class IndexType = int,
700 const int rank = ViewType1::rank>
701 KOKKOS_INLINE_FUNCTION
void
702 AXPY (
const CoefficientType& alpha,
706 static_assert (static_cast<int> (ViewType1::rank) ==
707 static_cast<int> (ViewType2::rank),
708 "AXPY: x and y must have the same rank.");
709 Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
710 IndexType, rank>::run (alpha, x, y);
721 template<
class ViewType1,
723 class LayoutType1 =
typename ViewType1::array_layout,
724 class LayoutType2 =
typename ViewType2::array_layout,
725 class IndexType = int,
726 const int rank = ViewType1::rank>
727 KOKKOS_INLINE_FUNCTION
void
728 COPY (
const ViewType1& x,
const ViewType2& y)
730 static_assert (static_cast<int> (ViewType1::rank) ==
731 static_cast<int> (ViewType2::rank),
732 "COPY: x and y must have the same rank.");
733 Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
748 template<
class VecType1,
752 class IndexType =
int>
753 KOKKOS_INLINE_FUNCTION
void
759 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
760 IndexType>::run (alpha, A, x, y);
770 template<
class ViewType1,
773 class CoefficientType,
774 class IndexType =
int>
775 KOKKOS_INLINE_FUNCTION
void
778 const CoefficientType& alpha,
781 const CoefficientType& beta,
785 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
786 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
787 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
789 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
790 typedef Kokkos::Details::ArithTraits<Scalar> STS;
791 const Scalar
ZERO = STS::zero();
792 const Scalar ONE = STS::one();
796 if(transA[0] ==
'N' || transA[0] ==
'n') {
797 m =
static_cast<IndexType
> (A.extent (0));
798 n =
static_cast<IndexType
> (A.extent (1));
801 m =
static_cast<IndexType
> (A.extent (1));
802 n =
static_cast<IndexType
> (A.extent (0));
804 k =
static_cast<IndexType
> (C.extent (1));
807 if(alpha == ZERO && beta == ONE)
813 for(IndexType i=0; i<m; i++) {
814 for(IndexType j=0; j<k; j++) {
820 for(IndexType i=0; i<m; i++) {
821 for(IndexType j=0; j<k; j++) {
822 C(i,j) = beta*C(i,j);
829 if(transB[0] ==
'n' || transB[0] ==
'N') {
830 if(transA[0] ==
'n' || transA[0] ==
'N') {
832 for(IndexType j=0; j<n; j++) {
834 for(IndexType i=0; i<m; i++) {
838 else if(beta != ONE) {
839 for(IndexType i=0; i<m; i++) {
840 C(i,j) = beta*C(i,j);
843 for(IndexType l=0; l<k; l++) {
844 Scalar temp = alpha*B(l,j);
845 for(IndexType i=0; i<m; i++) {
846 C(i,j) = C(i,j) + temp*A(i,l);
853 for(IndexType j=0; j<n; j++) {
854 for(IndexType i=0; i<m; i++) {
856 for(IndexType l=0; l<k; l++) {
857 temp = temp + A(l,i)*B(l,j);
863 C(i,j) = alpha*temp + beta*C(i,j);
870 if(transA[0] ==
'n' || transA[0] ==
'N') {
872 for(IndexType j=0; j<n; j++) {
874 for(IndexType i=0; i<m; i++) {
878 else if(beta != ONE) {
879 for(IndexType i=0; i<m; i++) {
880 C(i,j) = beta*C(i,j);
883 for(IndexType l=0; l<k; l++) {
884 Scalar temp = alpha*B(j,l);
885 for(IndexType i=0; i<m; i++) {
886 C(i,j) = C(i,j) + temp*A(i,l);
893 for(IndexType j=0; j<n; j++) {
894 for(IndexType i=0; i<m; i++) {
896 for(IndexType l=0; l<k; l++) {
897 temp = temp + A(l,i)*B(j,l);
903 C(i,j) = alpha*temp + beta*C(i,j);
912 template<
class LittleBlockType,
913 class LittleVectorType>
914 KOKKOS_INLINE_FUNCTION
void
915 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
918 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
919 static_assert (std::is_integral<IndexType>::value,
920 "GETF2: The type of each entry of ipiv must be an integer type.");
921 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
922 static_assert (! std::is_const<Scalar>::value,
923 "GETF2: A must not be a const View (or LittleBlock).");
924 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
925 "GETF2: ipiv must not be a const View (or LittleBlock).");
926 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
927 typedef Kokkos::Details::ArithTraits<Scalar> STS;
928 const Scalar
ZERO = STS::zero();
930 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
931 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
932 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
935 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
936 if (pivDim < minPivDim) {
944 for(IndexType j=0; j < pivDim; j++)
948 for(IndexType i=j+1; i<numRows; i++)
950 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
961 for(IndexType i=0; i < numCols; i++)
963 Scalar temp = A(jp,i);
970 for(IndexType i=j+1; i<numRows; i++) {
971 A(i,j) = A(i,j) / A(j,j);
979 for(IndexType r=j+1; r < numRows; r++)
981 for(IndexType c=j+1; c < numCols; c++) {
982 A(r,c) = A(r,c) - A(r,j) * A(j,c);
993 template<
class LittleBlockType,
994 class LittleIntVectorType,
995 class LittleScalarVectorType,
996 const int rank = LittleScalarVectorType::rank>
998 static KOKKOS_INLINE_FUNCTION
void
999 run (
const char mode[],
1000 const LittleBlockType& A,
1001 const LittleIntVectorType& ipiv,
1002 const LittleScalarVectorType& B,
1007 template<
class LittleBlockType,
1008 class LittleIntVectorType,
1009 class LittleScalarVectorType>
1010 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1011 static KOKKOS_INLINE_FUNCTION
void
1012 run (
const char mode[],
1013 const LittleBlockType& A,
1014 const LittleIntVectorType& ipiv,
1015 const LittleScalarVectorType& B,
1019 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1022 static_assert (std::is_integral<IndexType>::value &&
1023 std::is_signed<IndexType>::value,
1024 "GETRS: The type of each entry of ipiv must be a signed integer.");
1025 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1026 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1027 "GETRS: B must not be a const View (or LittleBlock).");
1028 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1029 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1030 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
1032 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1033 const Scalar
ZERO = STS::zero();
1034 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1035 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1036 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
1041 if (numRows != numCols) {
1047 if (pivDim < numRows) {
1053 if(mode[0] ==
'n' || mode[0] ==
'N') {
1055 for(IndexType i=0; i<numRows; i++) {
1056 if(ipiv(i) != i+1) {
1058 B(i) = B(ipiv(i)-1);
1059 B(ipiv(i)-1) = temp;
1064 for(IndexType r=1; r < numRows; r++) {
1065 for(IndexType c=0; c < r; c++) {
1066 B(r) = B(r) - A(r,c)*B(c);
1071 for(IndexType r=numRows-1; r >= 0; r--) {
1073 if(A(r,r) == ZERO) {
1078 for(IndexType c=r+1; c < numCols; c++) {
1079 B(r) = B(r) - A(r,c)*B(c);
1081 B(r) = B(r) / A(r,r);
1085 else if(mode[0] ==
't' || mode[0] ==
'T') {
1090 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1103 template<
class LittleBlockType,
1104 class LittleIntVectorType,
1105 class LittleScalarVectorType>
1106 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1107 static KOKKOS_INLINE_FUNCTION
void
1108 run (
const char mode[],
1109 const LittleBlockType& A,
1110 const LittleIntVectorType& ipiv,
1111 const LittleScalarVectorType& B,
1115 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1116 static_assert (std::is_integral<IndexType>::value,
1117 "GETRS: The type of each entry of ipiv must be an integer type.");
1118 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1119 "GETRS: B must not be a const View (or LittleBlock).");
1120 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1121 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1122 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1127 const IndexType numRhs = B.extent (1);
1130 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1131 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1145 template<
class LittleBlockType,
1146 class LittleIntVectorType,
1147 class LittleScalarVectorType>
1148 KOKKOS_INLINE_FUNCTION
void
1150 const LittleBlockType& A,
1151 const LittleIntVectorType& ipiv,
1152 const LittleScalarVectorType& B,
1155 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1156 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1174 template<
class LittleBlockType,
1175 class LittleIntVectorType,
1176 class LittleScalarVectorType>
1177 KOKKOS_INLINE_FUNCTION
void
1179 const LittleIntVectorType& ipiv,
1180 const LittleScalarVectorType& work,
1184 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1187 static_assert (std::is_integral<IndexType>::value &&
1188 std::is_signed<IndexType>::value,
1189 "GETRI: The type of each entry of ipiv must be a signed integer.");
1190 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1191 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1192 "GETRI: A must not be a const View (or LittleBlock).");
1193 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1194 "GETRI: work must not be a const View (or LittleBlock).");
1195 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1196 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1197 const Scalar
ZERO = STS::zero();
1198 const Scalar ONE = STS::one();
1200 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1201 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1202 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
1203 const IndexType workDim =
static_cast<IndexType
> (work.extent (0));
1208 if (numRows != numCols) {
1214 if (pivDim < numRows) {
1220 if (workDim < numRows) {
1226 for(IndexType j=0; j < numRows; j++) {
1227 if(A(j,j) ==
ZERO) {
1232 A(j,j) = ONE / A(j,j);
1235 for(IndexType r=0; r < j; r++) {
1236 A(r,j) = A(r,r)*A(r,j);
1237 for(IndexType c=r+1; c < j; c++) {
1238 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1241 for(IndexType r=0; r < j; r++) {
1242 A(r,j) = -A(j,j)*A(r,j);
1247 for(IndexType j = numCols-2; j >= 0; j--) {
1249 for(IndexType r=j+1; r < numRows; r++) {
1254 for(IndexType r=0; r < numRows; r++) {
1255 for(IndexType i=j+1; i < numRows; i++) {
1256 A(r,j) = A(r,j) - work(i)*A(r,i);
1262 for(IndexType j=numRows-1; j >= 0; j--) {
1263 IndexType jp = ipiv(j)-1;
1265 for(IndexType r=0; r < numRows; r++) {
1266 Scalar temp = A(r,j);
1279 template<
class LittleBlockType,
1280 class LittleVectorTypeX,
1281 class LittleVectorTypeY,
1282 class CoefficientType,
1283 class IndexType =
int>
1284 KOKKOS_INLINE_FUNCTION
void
1285 GEMV (
const char trans,
1286 const CoefficientType& alpha,
1287 const LittleBlockType& A,
1288 const LittleVectorTypeX& x,
1289 const CoefficientType& beta,
1290 const LittleVectorTypeY& y)
1296 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1297 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1298 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1302 for (IndexType i = 0; i < numRows; ++i) {
1307 for (IndexType i = 0; i < numRows; ++i) {
1308 y_value_type y_i = 0.0;
1309 for (IndexType j = 0; j < numCols; ++j) {
1310 y_i += A(i,j) * x(j);
1319 for (IndexType i = 0; i < numRows; ++i) {
1324 for (IndexType i = 0; i < numRows; ++i) {
1330 for (IndexType i = 0; i < numRows; ++i) {
1331 y_value_type y_i = beta * y(i);
1332 for (IndexType j = 0; j < numCols; ++j) {
1333 y_i += alpha * A(i,j) * x(j);
1346 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Computes the solution to Ax=b.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)