Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_BLOCKVIEW_HPP
11 #define TPETRA_BLOCKVIEW_HPP
12 
20 
21 #include "TpetraCore_config.h"
22 #include "Kokkos_ArithTraits.hpp"
23 #include "Kokkos_Complex.hpp"
24 
25 namespace Tpetra {
26 
31 
32 namespace Impl {
33 
40 template<class ViewType1,
41  class ViewType2,
42  const int rank1 = ViewType1::rank>
43 struct AbsMax {
44  static KOKKOS_INLINE_FUNCTION void
45  run (const ViewType2& Y, const ViewType1& X);
46 };
47 
52 template<class ViewType1,
53  class ViewType2>
54 struct AbsMax<ViewType1, ViewType2, 2> {
57  static KOKKOS_INLINE_FUNCTION void
58  run (const ViewType2& Y, const ViewType1& X)
59  {
60  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
61  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
62  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
63  static_assert(! std::is_const<STY>::value,
64  "AbsMax: The type of each entry of Y must be nonconst.");
65  typedef typename std::decay<decltype (X(0,0)) >::type STX;
66  static_assert( std::is_same<STX, STY>::value,
67  "AbsMax: The type of each entry of X and Y must be the same.");
68  typedef Kokkos::ArithTraits<STY> KAT;
69 
70  const int numCols = Y.extent (1);
71  const int numRows = Y.extent (0);
72  for (int j = 0; j < numCols; ++j) {
73  for (int i = 0; i < numRows; ++i) {
74  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
75  const STX X_ij = X(i,j);
76  // NOTE: no std::max (not a CUDA __device__ function); must
77  // cast back up to complex.
78  const auto Y_ij_abs = KAT::abs (Y_ij);
79  const auto X_ij_abs = KAT::abs (X_ij);
80  Y_ij = (Y_ij_abs >= X_ij_abs) ?
81  static_cast<STY> (Y_ij_abs) :
82  static_cast<STY> (X_ij_abs);
83  }
84  }
85  }
86 };
87 
92 template<class ViewType1,
93  class ViewType2>
94 struct AbsMax<ViewType1, ViewType2, 1> {
97  static KOKKOS_INLINE_FUNCTION void
98  run (const ViewType2& Y, const ViewType1& X)
99  {
100  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102 
103  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
104  static_assert(! std::is_const<STY>::value,
105  "AbsMax: The type of each entry of Y must be nonconst.");
106  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
107  static_assert( std::is_same<STX, STY>::value,
108  "AbsMax: The type of each entry of X and Y must be the same.");
109  typedef Kokkos::ArithTraits<STY> KAT;
110 
111  const int numRows = Y.extent (0);
112  for (int i = 0; i < numRows; ++i) {
113  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
114  const STX X_i = X(i);
115  // NOTE: no std::max (not a CUDA __device__ function); must
116  // cast back up to complex.
117  const auto Y_i_abs = KAT::abs (Y_i);
118  const auto X_i_abs = KAT::abs (X_i);
119  Y_i = (Y_i_abs >= X_i_abs) ?
120  static_cast<STY> (Y_i_abs) :
121  static_cast<STY> (X_i_abs);
122  }
123  }
124 };
125 
132 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
133 KOKKOS_INLINE_FUNCTION void
134 absMax (const ViewType2& Y, const ViewType1& X)
135 {
136  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
137  "absMax: ViewType1 and ViewType2 must have the same rank.");
139 }
140 
145 template<class ViewType,
146  class CoefficientType,
147  class IndexType = int,
148  const bool is_contiguous = false,
149  const int rank = ViewType::rank>
150 struct SCAL {
151  static KOKKOS_INLINE_FUNCTION void
152  run (const CoefficientType& alpha, const ViewType& x);
153 };
154 
157 template<class ViewType,
158  class CoefficientType,
159  class IndexType>
160 struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
162  static KOKKOS_INLINE_FUNCTION void
163  run (const CoefficientType& alpha, const ViewType& x)
164  {
165  const IndexType numRows = static_cast<IndexType> (x.extent (0));
166 
168 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
169 #pragma unroll
170 #endif
171  for (IndexType i = 0; i < numRows; ++i)
172  x(i) = alpha * x(i);
173  }
174 };
177 template<class ViewType,
178  class CoefficientType,
179  class IndexType>
180 struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
182  static KOKKOS_INLINE_FUNCTION void
183  run (const CoefficientType& alpha, const ViewType& A)
184  {
185  const IndexType numRows = static_cast<IndexType> (A.extent (0));
186  const IndexType numCols = static_cast<IndexType> (A.extent (1));
187 
188  for (IndexType j = 0; j < numCols; ++j)
189  for (IndexType i = 0; i < numRows; ++i)
190  A(i,j) = alpha * A(i,j);
191  }
192 };
193 template<class ViewType,
194  class CoefficientType,
195  class IndexType,
196  const int rank>
197 struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
199  static KOKKOS_INLINE_FUNCTION void
200  run (const CoefficientType& alpha, const ViewType& x)
201  {
202  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
203  const IndexType span = static_cast<IndexType> (x.span());
204  x_value_type *__restrict x_ptr(x.data());
205 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
206 #pragma unroll
207 #endif
208  for (IndexType i = 0; i < span; ++i)
209  x_ptr[i] = alpha * x_ptr[i];
210  }
211 };
212 
217 template<class ViewType,
218  class InputType,
219  class IndexType = int,
220  const bool is_contiguous = false,
221  const int rank = ViewType::rank>
222 struct FILL {
223  static KOKKOS_INLINE_FUNCTION void
224  run (const ViewType& x, const InputType& val);
225 };
226 
229 template<class ViewType,
230  class InputType,
231  class IndexType>
232 struct FILL<ViewType, InputType, IndexType, false, 1> {
233  static KOKKOS_INLINE_FUNCTION void
234  run (const ViewType& x, const InputType& val)
235  {
236  const IndexType numRows = static_cast<IndexType> (x.extent (0));
237  for (IndexType i = 0; i < numRows; ++i)
238  x(i) = val;
239  }
240 };
243 template<class ViewType,
244  class InputType,
245  class IndexType>
246 struct FILL<ViewType, InputType, IndexType, false, 2> {
247  static KOKKOS_INLINE_FUNCTION void
248  run (const ViewType& X, const InputType& val)
249  {
250  const IndexType numRows = static_cast<IndexType> (X.extent (0));
251  const IndexType numCols = static_cast<IndexType> (X.extent (1));
252  for (IndexType j = 0; j < numCols; ++j)
253  for (IndexType i = 0; i < numRows; ++i)
254  X(i,j) = val;
255  }
256 };
257 template<class ViewType,
258  class InputType,
259  class IndexType,
260  const int rank>
261 struct FILL<ViewType, InputType, IndexType, true, rank> {
262  static KOKKOS_INLINE_FUNCTION void
263  run (const ViewType& x, const InputType& val)
264  {
265  const IndexType span = static_cast<IndexType> (x.span());
266  auto x_ptr = x.data();
267 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
268 #pragma unroll
269 #endif
270  for (IndexType i = 0; i < span; ++i)
271  x_ptr[i] = val;
272  }
273 };
274 
275 
280 template<class CoefficientType,
281  class ViewType1,
282  class ViewType2,
283  class IndexType = int,
284  const bool is_contiguous = false,
285  const int rank = ViewType1::rank>
286 struct AXPY {
287  static KOKKOS_INLINE_FUNCTION void
288  run (const CoefficientType& alpha,
289  const ViewType1& x,
290  const ViewType2& y);
291 };
292 
295 template<class CoefficientType,
296  class ViewType1,
297  class ViewType2,
298  class IndexType>
299 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
301  static KOKKOS_INLINE_FUNCTION void
302  run (const CoefficientType& alpha,
303  const ViewType1& x,
304  const ViewType2& y)
305  {
306  using Kokkos::ArithTraits;
307  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
308  "AXPY: x and y must have the same rank.");
309 
310  const IndexType numRows = static_cast<IndexType> (y.extent (0));
311  if (alpha != ArithTraits<CoefficientType>::zero ()) {
313  for (IndexType i = 0; i < numRows; ++i)
314  y(i) += alpha * x(i);
315  }
316  }
317 };
318 
321 template<class CoefficientType,
322  class ViewType1,
323  class ViewType2,
324  class IndexType>
325 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
327  static KOKKOS_INLINE_FUNCTION void
328  run (const CoefficientType& alpha,
329  const ViewType1& X,
330  const ViewType2& Y)
331  {
332  using Kokkos::ArithTraits;
333  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
334  "AXPY: X and Y must have the same rank.");
335  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
336  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
337 
338  if (alpha != ArithTraits<CoefficientType>::zero ()) {
339  for (IndexType j = 0; j < numCols; ++j)
340  for (IndexType i = 0; i < numRows; ++i)
341  Y(i,j) += alpha * X(i,j);
342  }
343  }
344 };
345 
346 template<class CoefficientType,
347  class ViewType1,
348  class ViewType2,
349  class IndexType,
350  const int rank>
351 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
353  static KOKKOS_INLINE_FUNCTION void
354  run (const CoefficientType& alpha,
355  const ViewType1& x,
356  const ViewType2& y)
357  {
358  using Kokkos::ArithTraits;
359  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
360  "AXPY: x and y must have the same rank.");
361 
362  if (alpha != ArithTraits<CoefficientType>::zero ()) {
363  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
364  using y_value_type = typename std::decay<decltype (*y.data()) >::type;
365  const IndexType span = static_cast<IndexType> (y.span());
366  const x_value_type *__restrict x_ptr(x.data());
367  y_value_type *__restrict y_ptr(y.data());
368 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
369 #pragma unroll
370 #endif
371  for (IndexType i = 0; i < span; ++i)
372  y_ptr[i] += alpha * x_ptr[i];
373  }
374  }
375 };
376 
381 template<class ViewType1,
382  class ViewType2,
383  class IndexType = int,
384  const bool is_contiguous = false,
385  const int rank = ViewType1::rank>
386 struct COPY {
387  static KOKKOS_INLINE_FUNCTION void
388  run (const ViewType1& x, const ViewType2& y);
389 };
390 
393 template<class ViewType1,
394  class ViewType2,
395  class IndexType>
396 struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
398  static KOKKOS_INLINE_FUNCTION void
399  run (const ViewType1& x, const ViewType2& y)
400  {
401  const IndexType numRows = static_cast<IndexType> (x.extent (0));
403  for (IndexType i = 0; i < numRows; ++i)
404  y(i) = x(i);
405  }
406 };
407 
410 template<class ViewType1,
411  class ViewType2,
412  class IndexType>
413 struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
415  static KOKKOS_INLINE_FUNCTION void
416  run (const ViewType1& X, const ViewType2& Y)
417  {
418  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
419  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
421  for (IndexType j = 0; j < numCols; ++j)
422  for (IndexType i = 0; i < numRows; ++i)
423  Y(i,j) = X(i,j);
424  }
425 };
426 
427 template<class ViewType1,
428  class ViewType2,
429  class IndexType,
430  const int rank>
431 struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
433  static KOKKOS_INLINE_FUNCTION void
434  run (const ViewType1& x, const ViewType2& y)
435  {
436  const IndexType span = static_cast<IndexType> (x.span());
437  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
438  using y_value_type = typename std::decay<decltype (*y.data()) >::type;
439  const x_value_type *__restrict x_ptr(x.data());
440  y_value_type *__restrict y_ptr(y.data());
441 
442 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
443 #pragma unroll
444 #endif
445  for (IndexType i = 0; i < span; ++i)
446  y_ptr[i] = x_ptr[i];
447  }
448 };
449 
450 template<class VecType1,
451  class BlkType,
452  class VecType2,
453  class CoeffType,
454  class IndexType = int,
455  bool is_contiguous = false,
456  class BlkLayoutType = typename BlkType::array_layout>
457 struct GEMV {
458  static KOKKOS_INLINE_FUNCTION void
459  run (const CoeffType& alpha,
460  const BlkType& A,
461  const VecType1& x,
462  const VecType2& y)
463  {
464  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
465  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
466  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
467 
468  const IndexType numRows = static_cast<IndexType> (A.extent (0));
469  const IndexType numCols = static_cast<IndexType> (A.extent (1));
470 
472  for (IndexType i = 0; i < numRows; ++i)
473  for (IndexType j = 0; j < numCols; ++j)
474  y(i) += alpha * A(i,j) * x(j);
475  }
476 };
477 
478 template<class VecType1,
479  class BlkType,
480  class VecType2,
481  class CoeffType,
482  class IndexType>
483 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
484  static KOKKOS_INLINE_FUNCTION void
485  run (const CoeffType& alpha,
486  const BlkType& A,
487  const VecType1& x,
488  const VecType2& y)
489  {
490  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
491  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
492  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
493 
494  using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
495  using x_value_type = typename std::decay<decltype (x(0)) >::type;
496  using y_value_type = typename std::decay<decltype (y(0)) >::type;
497 
498  const IndexType numRows = static_cast<IndexType> (A.extent (0));
499  const IndexType numCols = static_cast<IndexType> (A.extent (1));
500 
501  const A_value_type *__restrict A_ptr(A.data()); const IndexType as1(A.stride(1));
502  const x_value_type *__restrict x_ptr(x.data());
503  y_value_type *__restrict y_ptr(y.data());
504 
505 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
506 #pragma unroll
507 #endif
508  for (IndexType j=0;j<numCols;++j) {
509  const x_value_type x_at_j = alpha*x_ptr[j];
510  const A_value_type *__restrict A_at_j = A_ptr + j*as1;
511 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
512 #pragma unroll
513 #endif
514  for (IndexType i=0;i<numRows;++i)
515  y_ptr[i] += A_at_j[i] * x_at_j;
516  }
517  }
518 };
519 
520 template<class VecType1,
521  class BlkType,
522  class VecType2,
523  class CoeffType,
524  class IndexType>
525 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
526  static KOKKOS_INLINE_FUNCTION void
527  run (const CoeffType& alpha,
528  const BlkType& A,
529  const VecType1& x,
530  const VecType2& y)
531  {
532  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
533  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
534  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
535 
536  using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
537  using x_value_type = typename std::decay<decltype (x(0)) >::type;
538  using y_value_type = typename std::decay<decltype (y(0)) >::type;
539 
540  const IndexType numRows = static_cast<IndexType> (A.extent (0));
541  const IndexType numCols = static_cast<IndexType> (A.extent (1));
542 
543  const A_value_type *__restrict A_ptr(A.data()); const IndexType as0(A.stride(0));
544  const x_value_type *__restrict x_ptr(x.data());
545  y_value_type *__restrict y_ptr(y.data());
546 
547 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
548 #pragma unroll
549 #endif
550  for (IndexType i=0;i<numRows;++i) {
551  y_value_type y_at_i(0);
552  const auto A_at_i = A_ptr + i*as0;
553 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
554 #pragma unroll
555 #endif
556  for (IndexType j=0;j<numCols;++j)
557  y_at_i += A_at_i[j] * x_ptr[j];
558  y_ptr[i] += alpha*y_at_i;
559  }
560  }
561 };
562 
563 } // namespace Impl
564 
567 template<class ViewType,
568  class CoefficientType,
569  class IndexType = int,
570  const int rank = ViewType::rank>
571 KOKKOS_INLINE_FUNCTION void
572 SCAL (const CoefficientType& alpha, const ViewType& x)
573 {
574  using LayoutType = typename ViewType::array_layout;
575  constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
576  std::is_same<LayoutType,Kokkos::LayoutRight>::value);
578 }
579 
581 template<class ViewType,
582  class InputType,
583  class IndexType = int,
584  const int rank = ViewType::rank>
585 KOKKOS_INLINE_FUNCTION void
586 FILL (const ViewType& x, const InputType& val)
587 {
588  using LayoutType = typename ViewType::array_layout;
589  constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
590  std::is_same<LayoutType,Kokkos::LayoutRight>::value);
592 }
593 
599 template<class CoefficientType,
600  class ViewType1,
601  class ViewType2,
602  class IndexType = int,
603  const int rank = ViewType1::rank>
604 KOKKOS_INLINE_FUNCTION void
605 AXPY (const CoefficientType& alpha,
606  const ViewType1& x,
607  const ViewType2& y)
608 {
609  static_assert (static_cast<int> (ViewType1::rank) ==
610  static_cast<int> (ViewType2::rank),
611  "AXPY: x and y must have the same rank.");
612  using LayoutType1 = typename ViewType1::array_layout;
613  using LayoutType2 = typename ViewType2::array_layout;
614  constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
615  constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
616  std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
617  constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
619 }
620 
629 template<class ViewType1,
630  class ViewType2,
631  class IndexType = int,
632  const int rank = ViewType1::rank>
633 KOKKOS_INLINE_FUNCTION void
634 COPY (const ViewType1& x, const ViewType2& y)
635 {
636  static_assert (static_cast<int> (ViewType1::rank) ==
637  static_cast<int> (ViewType2::rank),
638  "COPY: x and y must have the same rank.");
639  using LayoutType1 = typename ViewType1::array_layout;
640  using LayoutType2 = typename ViewType2::array_layout;
641  constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
642  constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
643  std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
644  constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
646 }
647 
659 template<class VecType1,
660  class BlkType,
661  class VecType2,
662  class CoeffType,
663  class IndexType = int>
664 KOKKOS_INLINE_FUNCTION void
665 GEMV (const CoeffType& alpha,
666  const BlkType& A,
667  const VecType1& x,
668  const VecType2& y)
669 {
670  constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
671  std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
672  constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
673  std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
674  constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
675  std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
676  constexpr bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
677  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
678 }
679 
687 template<class ViewType1,
688  class ViewType2,
689  class ViewType3,
690  class CoefficientType,
691  class IndexType = int>
692 KOKKOS_INLINE_FUNCTION void
693 GEMM (const char transA[],
694  const char transB[],
695  const CoefficientType& alpha,
696  const ViewType1& A,
697  const ViewType2& B,
698  const CoefficientType& beta,
699  const ViewType3& C)
700 {
701  // Assert that A, B, and C are in fact matrices
702  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
703  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
704  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
705 
706  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
707  typedef Kokkos::ArithTraits<Scalar> STS;
708  const Scalar ZERO = STS::zero();
709  const Scalar ONE = STS::one();
710 
711  // Get the dimensions
712  IndexType m, n, k;
713  if(transA[0] == 'N' || transA[0] == 'n') {
714  m = static_cast<IndexType> (A.extent (0));
715  n = static_cast<IndexType> (A.extent (1));
716  }
717  else {
718  m = static_cast<IndexType> (A.extent (1));
719  n = static_cast<IndexType> (A.extent (0));
720  }
721  k = static_cast<IndexType> (C.extent (1));
722 
723  // quick return if possible
724  if(alpha == ZERO && beta == ONE)
725  return;
726 
727  // And if alpha equals zero...
728  if(alpha == ZERO) {
729  if(beta == ZERO) {
730  for(IndexType i=0; i<m; i++) {
731  for(IndexType j=0; j<k; j++) {
732  C(i,j) = ZERO;
733  }
734  }
735  }
736  else {
737  for(IndexType i=0; i<m; i++) {
738  for(IndexType j=0; j<k; j++) {
739  C(i,j) = beta*C(i,j);
740  }
741  }
742  }
743  }
744 
745  // Start the operations
746  if(transB[0] == 'n' || transB[0] == 'N') {
747  if(transA[0] == 'n' || transA[0] == 'N') {
748  // Form C = alpha*A*B + beta*C
749  for(IndexType j=0; j<n; j++) {
750  if(beta == ZERO) {
751  for(IndexType i=0; i<m; i++) {
752  C(i,j) = ZERO;
753  }
754  }
755  else if(beta != ONE) {
756  for(IndexType i=0; i<m; i++) {
757  C(i,j) = beta*C(i,j);
758  }
759  }
760  for(IndexType l=0; l<k; l++) {
761  Scalar temp = alpha*B(l,j);
762  for(IndexType i=0; i<m; i++) {
763  C(i,j) = C(i,j) + temp*A(i,l);
764  }
765  }
766  }
767  }
768  else {
769  // Form C = alpha*A**T*B + beta*C
770  for(IndexType j=0; j<n; j++) {
771  for(IndexType i=0; i<m; i++) {
772  Scalar temp = ZERO;
773  for(IndexType l=0; l<k; l++) {
774  temp = temp + A(l,i)*B(l,j);
775  }
776  if(beta == ZERO) {
777  C(i,j) = alpha*temp;
778  }
779  else {
780  C(i,j) = alpha*temp + beta*C(i,j);
781  }
782  }
783  }
784  }
785  }
786  else {
787  if(transA[0] == 'n' || transA[0] == 'N') {
788  // Form C = alpha*A*B**T + beta*C
789  for(IndexType j=0; j<n; j++) {
790  if(beta == ZERO) {
791  for(IndexType i=0; i<m; i++) {
792  C(i,j) = ZERO;
793  }
794  }
795  else if(beta != ONE) {
796  for(IndexType i=0; i<m; i++) {
797  C(i,j) = beta*C(i,j);
798  }
799  }
800  for(IndexType l=0; l<k; l++) {
801  Scalar temp = alpha*B(j,l);
802  for(IndexType i=0; i<m; i++) {
803  C(i,j) = C(i,j) + temp*A(i,l);
804  }
805  }
806  }
807  }
808  else {
809  // Form C = alpha*A**T*B**T + beta*C
810  for(IndexType j=0; j<n; j++) {
811  for(IndexType i=0; i<m; i++) {
812  Scalar temp = ZERO;
813  for(IndexType l=0; l<k; l++) {
814  temp = temp + A(l,i)*B(j,l);
815  }
816  if(beta == ZERO) {
817  C(i,j) = alpha*temp;
818  }
819  else {
820  C(i,j) = alpha*temp + beta*C(i,j);
821  }
822  }
823  }
824  }
825  }
826 }
827 
829 template<class LittleBlockType,
830  class LittleVectorType>
831 KOKKOS_INLINE_FUNCTION void
832 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
833 {
834  // The type of an entry of ipiv is the index type.
835  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
836  static_assert (std::is_integral<IndexType>::value,
837  "GETF2: The type of each entry of ipiv must be an integer type.");
838  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
839  static_assert (! std::is_const<Scalar>::value,
840  "GETF2: A must not be a const View (or LittleBlock).");
841  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
842  "GETF2: ipiv must not be a const View (or LittleBlock).");
843  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
844  typedef Kokkos::ArithTraits<Scalar> STS;
845  const Scalar ZERO = STS::zero();
846 
847  const IndexType numRows = static_cast<IndexType> (A.extent (0));
848  const IndexType numCols = static_cast<IndexType> (A.extent (1));
849  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
850 
851  // std::min is not a CUDA device function
852  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
853  if (pivDim < minPivDim) {
854  info = -2;
855  return;
856  }
857 
858  // Initialize info
859  info = 0;
860 
861  for(IndexType j=0; j < pivDim; j++)
862  {
863  // Find pivot and test for singularity
864  IndexType jp = j;
865  for(IndexType i=j+1; i<numRows; i++)
866  {
867  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
868  jp = i;
869  }
870  }
871  ipiv(j) = jp+1;
872 
873  if(A(jp,j) != ZERO)
874  {
875  // Apply the interchange to columns 1:N
876  if(jp != j)
877  {
878  for(IndexType i=0; i < numCols; i++)
879  {
880  Scalar temp = A(jp,i);
881  A(jp,i) = A(j,i);
882  A(j,i) = temp;
883  }
884  }
885 
886  // Compute elements J+1:M of J-th column
887  for(IndexType i=j+1; i<numRows; i++) {
888  A(i,j) = A(i,j) / A(j,j);
889  }
890  }
891  else if(info == 0) {
892  info = j;
893  }
894 
895  // Update trailing submatrix
896  for(IndexType r=j+1; r < numRows; r++)
897  {
898  for(IndexType c=j+1; c < numCols; c++) {
899  A(r,c) = A(r,c) - A(r,j) * A(j,c);
900  }
901  }
902  }
903 }
904 
905 namespace Impl {
906 
910 template<class LittleBlockType,
911  class LittleIntVectorType,
912  class LittleScalarVectorType,
913  const int rank = LittleScalarVectorType::rank>
914 struct GETRS {
915  static KOKKOS_INLINE_FUNCTION void
916  run (const char mode[],
917  const LittleBlockType& A,
918  const LittleIntVectorType& ipiv,
919  const LittleScalarVectorType& B,
920  int& info);
921 };
922 
924 template<class LittleBlockType,
925  class LittleIntVectorType,
926  class LittleScalarVectorType>
927 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
928  static KOKKOS_INLINE_FUNCTION void
929  run (const char mode[],
930  const LittleBlockType& A,
931  const LittleIntVectorType& ipiv,
932  const LittleScalarVectorType& B,
933  int& info)
934  {
935  // The type of an entry of ipiv is the index type.
936  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
937  // IndexType must be signed, because this code does a countdown loop
938  // to zero. Unsigned integers are always >= 0, even on underflow.
939  static_assert (std::is_integral<IndexType>::value &&
940  std::is_signed<IndexType>::value,
941  "GETRS: The type of each entry of ipiv must be a signed integer.");
942  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
943  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
944  "GETRS: B must not be a const View (or LittleBlock).");
945  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
946  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
947  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
948 
949  typedef Kokkos::ArithTraits<Scalar> STS;
950  const Scalar ZERO = STS::zero();
951  const IndexType numRows = static_cast<IndexType> (A.extent (0));
952  const IndexType numCols = static_cast<IndexType> (A.extent (1));
953  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
954 
955  info = 0;
956 
957  // Ensure that the matrix is square
958  if (numRows != numCols) {
959  info = -2;
960  return;
961  }
962 
963  // Ensure that the pivot array is sufficiently large
964  if (pivDim < numRows) {
965  info = -3;
966  return;
967  }
968 
969  // No transpose case
970  if(mode[0] == 'n' || mode[0] == 'N') {
971  // Apply row interchanges to the RHS
972  for(IndexType i=0; i<numRows; i++) {
973  if(ipiv(i) != i+1) {
974  Scalar temp = B(i);
975  B(i) = B(ipiv(i)-1);
976  B(ipiv(i)-1) = temp;
977  }
978  }
979 
980  // Solve Lx=b, overwriting b with x
981  for(IndexType r=1; r < numRows; r++) {
982  for(IndexType c=0; c < r; c++) {
983  B(r) = B(r) - A(r,c)*B(c);
984  }
985  }
986 
987  // Solve Ux=b, overwriting b with x
988  for(IndexType r=numRows-1; r >= 0; r--) {
989  // Check whether U is singular
990  if(A(r,r) == ZERO) {
991  info = r+1;
992  return;
993  }
994 
995  for(IndexType c=r+1; c < numCols; c++) {
996  B(r) = B(r) - A(r,c)*B(c);
997  }
998  B(r) = B(r) / A(r,r);
999  }
1000  }
1001  // Transpose case
1002  else if(mode[0] == 't' || mode[0] == 'T') {
1003  info = -1; // NOT YET IMPLEMENTED
1004  return;
1005  }
1006  // Conjugate transpose case
1007  else if (mode[0] == 'c' || mode[0] == 'C') {
1008  info = -1; // NOT YET IMPLEMENTED
1009  return;
1010  }
1011  else { // invalid mode
1012  info = -1;
1013  return;
1014  }
1015  }
1016 };
1017 
1018 
1020 template<class LittleBlockType,
1021  class LittleIntVectorType,
1022  class LittleScalarVectorType>
1023 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1024  static KOKKOS_INLINE_FUNCTION void
1025  run (const char mode[],
1026  const LittleBlockType& A,
1027  const LittleIntVectorType& ipiv,
1028  const LittleScalarVectorType& B,
1029  int& info)
1030  {
1031  // The type of an entry of ipiv is the index type.
1032  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1033  static_assert (std::is_integral<IndexType>::value,
1034  "GETRS: The type of each entry of ipiv must be an integer type.");
1035  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1036  "GETRS: B must not be a const View (or LittleBlock).");
1037  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1038  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1039  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1040 
1041  // The current implementation iterates over one right-hand side at
1042  // a time. It might be faster to do this differently, but this
1043  // should work for now.
1044  const IndexType numRhs = B.extent (1);
1045  info = 0;
1046 
1047  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1048  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1050  if (info != 0) {
1051  return;
1052  }
1053  }
1054  }
1055 };
1056 
1057 } // namespace Impl
1058 
1062 template<class LittleBlockType,
1063  class LittleIntVectorType,
1064  class LittleScalarVectorType>
1065 KOKKOS_INLINE_FUNCTION void
1066 GETRS (const char mode[],
1067  const LittleBlockType& A,
1068  const LittleIntVectorType& ipiv,
1069  const LittleScalarVectorType& B,
1070  int& info)
1071 {
1072  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1073  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1074 }
1075 
1076 
1091 template<class LittleBlockType,
1092  class LittleIntVectorType,
1093  class LittleScalarVectorType>
1094 KOKKOS_INLINE_FUNCTION void
1095 GETRI (const LittleBlockType& A,
1096  const LittleIntVectorType& ipiv,
1097  const LittleScalarVectorType& work,
1098  int& info)
1099 {
1100  // The type of an entry of ipiv is the index type.
1101  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1102  // IndexType must be signed, because this code does a countdown loop
1103  // to zero. Unsigned integers are always >= 0, even on underflow.
1104  static_assert (std::is_integral<IndexType>::value &&
1105  std::is_signed<IndexType>::value,
1106  "GETRI: The type of each entry of ipiv must be a signed integer.");
1107  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1108  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1109  "GETRI: A must not be a const View (or LittleBlock).");
1110  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1111  "GETRI: work must not be a const View (or LittleBlock).");
1112  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1113  typedef Kokkos::ArithTraits<Scalar> STS;
1114  const Scalar ZERO = STS::zero();
1115  const Scalar ONE = STS::one();
1116 
1117  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1118  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1119  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1120  const IndexType workDim = static_cast<IndexType> (work.extent (0));
1121 
1122  info = 0;
1123 
1124  // Ensure that the matrix is square
1125  if (numRows != numCols) {
1126  info = -1;
1127  return;
1128  }
1129 
1130  // Ensure that the pivot array is sufficiently large
1131  if (pivDim < numRows) {
1132  info = -2;
1133  return;
1134  }
1135 
1136  // Ensure that the work array is sufficiently large
1137  if (workDim < numRows) {
1138  info = -3;
1139  return;
1140  }
1141 
1142  // Form Uinv in place
1143  for(IndexType j=0; j < numRows; j++) {
1144  if(A(j,j) == ZERO) {
1145  info = j+1;
1146  return;
1147  }
1148 
1149  A(j,j) = ONE / A(j,j);
1150 
1151  // Compute elements 1:j-1 of j-th column
1152  for(IndexType r=0; r < j; r++) {
1153  A(r,j) = A(r,r)*A(r,j);
1154  for(IndexType c=r+1; c < j; c++) {
1155  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1156  }
1157  }
1158  for(IndexType r=0; r < j; r++) {
1159  A(r,j) = -A(j,j)*A(r,j);
1160  }
1161  }
1162 
1163  // Compute Ainv by solving A\L = Uinv
1164  for(IndexType j = numCols-2; j >= 0; j--) {
1165  // Copy lower triangular data to work array and replace with 0
1166  for(IndexType r=j+1; r < numRows; r++) {
1167  work(r) = A(r,j);
1168  A(r,j) = 0;
1169  }
1170 
1171  for(IndexType r=0; r < numRows; r++) {
1172  for(IndexType i=j+1; i < numRows; i++) {
1173  A(r,j) = A(r,j) - work(i)*A(r,i);
1174  }
1175  }
1176  }
1177 
1178  // Apply column interchanges
1179  for(IndexType j=numRows-1; j >= 0; j--) {
1180  IndexType jp = ipiv(j)-1;
1181  if(j != jp) {
1182  for(IndexType r=0; r < numRows; r++) {
1183  Scalar temp = A(r,j);
1184  A(r,j) = A(r,jp);
1185  A(r,jp) = temp;
1186  }
1187  }
1188  }
1189 }
1190 
1191 
1192 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1193 // an implementation for trans != 'N' (the transpose and conjugate
1194 // transpose cases).
1195 #if 0
1196 template<class LittleBlockType,
1197  class LittleVectorTypeX,
1198  class LittleVectorTypeY,
1199  class CoefficientType,
1200  class IndexType = int>
1201 KOKKOS_INLINE_FUNCTION void
1202 GEMV (const char trans,
1203  const CoefficientType& alpha,
1204  const LittleBlockType& A,
1205  const LittleVectorTypeX& x,
1206  const CoefficientType& beta,
1207  const LittleVectorTypeY& y)
1208 {
1209  // y(0) returns a reference to the 0-th entry of y. Remove that
1210  // reference to get the type of each entry of y. It's OK if y has
1211  // zero entries -- this doesn't actually do y(i), it just returns
1212  // the type of that expression.
1213  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1214  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1215  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1216 
1217  if (beta == 0.0) {
1218  if (alpha == 0.0) {
1219  for (IndexType i = 0; i < numRows; ++i) {
1220  y(i) = 0.0;
1221  }
1222  }
1223  else {
1224  for (IndexType i = 0; i < numRows; ++i) {
1225  y_value_type y_i = 0.0;
1226  for (IndexType j = 0; j < numCols; ++j) {
1227  y_i += A(i,j) * x(j);
1228  }
1229  y(i) = y_i;
1230  }
1231  }
1232  }
1233  else { // beta != 0
1234  if (alpha == 0.0) {
1235  if (beta == 0.0) {
1236  for (IndexType i = 0; i < numRows; ++i) {
1237  y(i) = 0.0;
1238  }
1239  }
1240  else {
1241  for (IndexType i = 0; i < numRows; ++i) {
1242  y(i) *= beta;
1243  }
1244  }
1245  }
1246  else {
1247  for (IndexType i = 0; i < numRows; ++i) {
1248  y_value_type y_i = beta * y(i);
1249  for (IndexType j = 0; j < numCols; ++j) {
1250  y_i += alpha * A(i,j) * x(j);
1251  }
1252  y(i) = y_i;
1253  }
1254  }
1255  }
1256 }
1257 
1258 #endif // 0
1259 
1260 } // namespace Tpetra
1261 
1262 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.