Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_BLOCKVIEW_HPP
43 #define TPETRA_BLOCKVIEW_HPP
44 
52 
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
56 
57 namespace Tpetra {
58 
63 
64 namespace Impl {
65 
72 template<class ViewType1,
73  class ViewType2,
74  const int rank1 = ViewType1::rank>
75 struct AbsMax {
76  static KOKKOS_INLINE_FUNCTION void
77  run (const ViewType2& Y, const ViewType1& X);
78 };
79 
84 template<class ViewType1,
85  class ViewType2>
86 struct AbsMax<ViewType1, ViewType2, 2> {
89  static KOKKOS_INLINE_FUNCTION void
90  run (const ViewType2& Y, const ViewType1& X)
91  {
92  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
93  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
95  static_assert(! std::is_const<STY>::value,
96  "AbsMax: The type of each entry of Y must be nonconst.");
97  typedef typename std::decay<decltype (X(0,0)) >::type STX;
98  static_assert( std::is_same<STX, STY>::value,
99  "AbsMax: The type of each entry of X and Y must be the same.");
100  typedef Kokkos::Details::ArithTraits<STY> KAT;
101 
102  const int numCols = Y.extent (1);
103  const int numRows = Y.extent (0);
104  for (int j = 0; j < numCols; ++j) {
105  for (int i = 0; i < numRows; ++i) {
106  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
107  const STX X_ij = X(i,j);
108  // NOTE: no std::max (not a CUDA __device__ function); must
109  // cast back up to complex.
110  const auto Y_ij_abs = KAT::abs (Y_ij);
111  const auto X_ij_abs = KAT::abs (X_ij);
112  Y_ij = (Y_ij_abs >= X_ij_abs) ?
113  static_cast<STY> (Y_ij_abs) :
114  static_cast<STY> (X_ij_abs);
115  }
116  }
117  }
118 };
119 
124 template<class ViewType1,
125  class ViewType2>
126 struct AbsMax<ViewType1, ViewType2, 1> {
129  static KOKKOS_INLINE_FUNCTION void
130  run (const ViewType2& Y, const ViewType1& X)
131  {
132  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
133  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
134 
135  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
136  static_assert(! std::is_const<STY>::value,
137  "AbsMax: The type of each entry of Y must be nonconst.");
138  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
139  static_assert( std::is_same<STX, STY>::value,
140  "AbsMax: The type of each entry of X and Y must be the same.");
141  typedef Kokkos::Details::ArithTraits<STY> KAT;
142 
143  const int numRows = Y.extent (0);
144  for (int i = 0; i < numRows; ++i) {
145  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
146  const STX X_i = X(i);
147  // NOTE: no std::max (not a CUDA __device__ function); must
148  // cast back up to complex.
149  const auto Y_i_abs = KAT::abs (Y_i);
150  const auto X_i_abs = KAT::abs (X_i);
151  Y_i = (Y_i_abs >= X_i_abs) ?
152  static_cast<STY> (Y_i_abs) :
153  static_cast<STY> (X_i_abs);
154  }
155  }
156 };
157 
164 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
165 KOKKOS_INLINE_FUNCTION void
166 absMax (const ViewType2& Y, const ViewType1& X)
167 {
168  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
169  "absMax: ViewType1 and ViewType2 must have the same rank.");
171 }
172 
177 template<class ViewType,
178  class CoefficientType,
179  class LayoutType = typename ViewType::array_layout,
180  class IndexType = int,
181  const int rank = ViewType::rank>
182 struct SCAL {
183  static KOKKOS_INLINE_FUNCTION void
184  run (const CoefficientType& alpha, const ViewType& x);
185 };
186 
189 template<class ViewType,
190  class CoefficientType,
191  class LayoutType,
192  class IndexType>
193 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
195  static KOKKOS_INLINE_FUNCTION void
196  run (const CoefficientType& alpha, const ViewType& x)
197  {
198  const IndexType numRows = static_cast<IndexType> (x.extent (0));
199  // BLAS _SCAL doesn't check whether alpha is 0.
200  for (IndexType i = 0; i < numRows; ++i) {
201  x(i) = alpha * x(i);
202  }
203  }
204 };
205 
208 template<class ViewType,
209  class CoefficientType,
210  class LayoutType,
211  class IndexType>
212 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
214  static KOKKOS_INLINE_FUNCTION void
215  run (const CoefficientType& alpha, const ViewType& A)
216  {
217  const IndexType numRows = static_cast<IndexType> (A.extent (0));
218  const IndexType numCols = static_cast<IndexType> (A.extent (1));
219 
220  // BLAS _SCAL doesn't check whether alpha is 0.
221  for (IndexType i = 0; i < numRows; ++i) {
222  for (IndexType j = 0; j < numCols; ++j) {
223  A(i,j) = alpha * A(i,j);
224  }
225  }
226  }
227 };
228 
234 template<class ViewType,
235  class CoefficientType,
236  class IndexType>
237 struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
239  static KOKKOS_INLINE_FUNCTION void
240  run (const CoefficientType& alpha, const ViewType& A)
241  {
242  const IndexType N = A.size ();
243  typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
244  scalar_type* const A_raw = A.data ();
245 
246  for (IndexType i = 0; i < N; ++i) {
247  A_raw[i] = alpha * A_raw[i];
248  }
249  }
250 };
251 
252 
257 template<class ViewType,
258  class InputType,
259  class LayoutType = typename ViewType::array_layout,
260  class IndexType = int,
261  const int rank = ViewType::rank>
262 struct FILL {
263  static KOKKOS_INLINE_FUNCTION void
264  run (const ViewType& x, const InputType& val);
265 };
266 
269 template<class ViewType,
270  class InputType,
271  class LayoutType,
272  class IndexType>
273 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
274  static KOKKOS_INLINE_FUNCTION void
275  run (const ViewType& x, const InputType& val)
276  {
277  const IndexType numRows = static_cast<IndexType> (x.extent (0));
278  for (IndexType i = 0; i < numRows; ++i) {
279  x(i) = val;
280  }
281  }
282 };
283 
286 template<class ViewType,
287  class InputType,
288  class LayoutType,
289  class IndexType>
290 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
291  static KOKKOS_INLINE_FUNCTION void
292  run (const ViewType& X, const InputType& val)
293  {
294  const IndexType numRows = static_cast<IndexType> (X.extent (0));
295  const IndexType numCols = static_cast<IndexType> (X.extent (1));
296  for (IndexType j = 0; j < numCols; ++j) {
297  for (IndexType i = 0; i < numRows; ++i) {
298  X(i,j) = val;
299  }
300  }
301  }
302 };
303 
308 template<class CoefficientType,
309  class ViewType1,
310  class ViewType2,
311  class LayoutType1 = typename ViewType1::array_layout,
312  class LayoutType2 = typename ViewType2::array_layout,
313  class IndexType = int,
314  const int rank = ViewType1::rank>
315 struct AXPY {
316  static KOKKOS_INLINE_FUNCTION void
317  run (const CoefficientType& alpha,
318  const ViewType1& x,
319  const ViewType2& y);
320 };
321 
324 template<class CoefficientType,
325  class ViewType1,
326  class ViewType2,
327  class LayoutType1,
328  class LayoutType2,
329  class IndexType>
330 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
332  static KOKKOS_INLINE_FUNCTION void
333  run (const CoefficientType& alpha,
334  const ViewType1& x,
335  const ViewType2& y)
336  {
337  using Kokkos::Details::ArithTraits;
338  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
339  "AXPY: x and y must have the same rank.");
340 
341  const IndexType numRows = static_cast<IndexType> (y.extent (0));
342  if (alpha != ArithTraits<CoefficientType>::zero ()) {
343  for (IndexType i = 0; i < numRows; ++i) {
344  y(i) += alpha * x(i);
345  }
346  }
347  }
348 };
349 
352 template<class CoefficientType,
353  class ViewType1,
354  class ViewType2,
355  class LayoutType1,
356  class LayoutType2,
357  class IndexType>
358 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
360  static KOKKOS_INLINE_FUNCTION void
361  run (const CoefficientType& alpha,
362  const ViewType1& X,
363  const ViewType2& Y)
364  {
365  using Kokkos::Details::ArithTraits;
366  static_assert (ViewType1::rank == ViewType2::rank,
367  "AXPY: X and Y must have the same rank.");
368  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
369  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
370 
371  if (alpha != ArithTraits<CoefficientType>::zero ()) {
372  for (IndexType i = 0; i < numRows; ++i) {
373  for (IndexType j = 0; j < numCols; ++j) {
374  Y(i,j) += alpha * X(i,j);
375  }
376  }
377  }
378  }
379 };
380 
384 template<class CoefficientType,
385  class ViewType1,
386  class ViewType2,
387  class IndexType>
388 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
390  static KOKKOS_INLINE_FUNCTION void
391  run (const CoefficientType& alpha,
392  const ViewType1& X,
393  const ViewType2& Y)
394  {
395  using Kokkos::Details::ArithTraits;
396  static_assert (static_cast<int> (ViewType1::rank) ==
397  static_cast<int> (ViewType2::rank),
398  "AXPY: X and Y must have the same rank.");
399  typedef typename std::decay<decltype (X(0,0)) >::type SX;
400  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
401 
402  const IndexType N = static_cast<IndexType> (Y.size ());
403  const SX* const X_raw = X.data ();
404  SY* const Y_raw = Y.data ();
405 
406  if (alpha != ArithTraits<CoefficientType>::zero ()) {
407  for (IndexType i = 0; i < N; ++i) {
408  Y_raw[i] += alpha * X_raw[i];
409  }
410  }
411  }
412 };
413 
417 template<class CoefficientType,
418  class ViewType1,
419  class ViewType2,
420  class IndexType>
421 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
423  static KOKKOS_INLINE_FUNCTION void
424  run (const CoefficientType& alpha,
425  const ViewType1& X,
426  const ViewType2& Y)
427  {
428  using Kokkos::Details::ArithTraits;
429  static_assert (ViewType1::rank == ViewType2::rank,
430  "AXPY: X and Y must have the same rank.");
431  typedef typename std::decay<decltype (X(0,0)) >::type SX;
432  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
433 
434  const IndexType N = static_cast<IndexType> (Y.size ());
435  const SX* const X_raw = X.data ();
436  SY* const Y_raw = Y.data ();
437 
438  if (alpha != ArithTraits<CoefficientType>::zero ()) {
439  for (IndexType i = 0; i < N; ++i) {
440  Y_raw[i] += alpha * X_raw[i];
441  }
442  }
443  }
444 };
445 
450 template<class ViewType1,
451  class ViewType2,
452  class LayoutType1 = typename ViewType1::array_layout,
453  class LayoutType2 = typename ViewType2::array_layout,
454  class IndexType = int,
455  const int rank = ViewType1::rank>
456 struct COPY {
457  static KOKKOS_INLINE_FUNCTION void
458  run (const ViewType1& x, const ViewType2& y);
459 };
460 
463 template<class ViewType1,
464  class ViewType2,
465  class LayoutType1,
466  class LayoutType2,
467  class IndexType>
468 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
470  static KOKKOS_INLINE_FUNCTION void
471  run (const ViewType1& x, const ViewType2& y)
472  {
473  const IndexType numRows = static_cast<IndexType> (x.extent (0));
474  for (IndexType i = 0; i < numRows; ++i) {
475  y(i) = x(i);
476  }
477  }
478 };
479 
482 template<class ViewType1,
483  class ViewType2,
484  class LayoutType1,
485  class LayoutType2,
486  class IndexType>
487 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
489  static KOKKOS_INLINE_FUNCTION void
490  run (const ViewType1& X, const ViewType2& Y)
491  {
492  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
493  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
494 
495  // BLAS _SCAL doesn't check whether alpha is 0.
496  for (IndexType i = 0; i < numRows; ++i) {
497  for (IndexType j = 0; j < numCols; ++j) {
498  Y(i,j) = X(i,j);
499  }
500  }
501  }
502 };
503 
507 template<class ViewType1,
508  class ViewType2,
509  class IndexType>
510 struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
512  static KOKKOS_INLINE_FUNCTION void
513  run (const ViewType1& X, const ViewType2& Y)
514  {
515  typedef typename std::decay<decltype (X(0,0)) >::type SX;
516  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
517 
518  const IndexType N = static_cast<IndexType> (Y.size ());
519  const SX* const X_raw = X.data ();
520  SY* const Y_raw = Y.data ();
521 
522  // BLAS _SCAL doesn't check whether alpha is 0.
523  for (IndexType i = 0; i < N; ++i) {
524  Y_raw[i] = X_raw[i];
525  }
526  }
527 };
528 
532 template<class ViewType1,
533  class ViewType2,
534  class IndexType>
535 struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
537  static KOKKOS_INLINE_FUNCTION void
538  run (const ViewType1& X, const ViewType2& Y)
539  {
540  typedef typename std::decay<decltype (X(0,0)) >::type SX;
541  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
542 
543  const IndexType N = static_cast<IndexType> (Y.size ());
544  const SX* const X_raw = X.data ();
545  SY* const Y_raw = Y.data ();
546 
547  // BLAS _SCAL doesn't check whether alpha is 0.
548  for (IndexType i = 0; i < N; ++i) {
549  Y_raw[i] = X_raw[i];
550  }
551  }
552 };
553 
554 
555 template<class VecType1,
556  class BlkType,
557  class VecType2,
558  class CoeffType,
559  class IndexType = int,
560  class VecLayoutType1 = typename VecType1::array_layout,
561  class BlkLayoutType = typename BlkType::array_layout,
562  class VecLayoutType2 = typename VecType2::array_layout>
563 struct GEMV {
564  static KOKKOS_INLINE_FUNCTION void
565  run (const CoeffType& alpha,
566  const BlkType& A,
567  const VecType1& x,
568  const VecType2& y)
569  {
570  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
571  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
572  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
573  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
574 
575  const IndexType numRows = static_cast<IndexType> (A.extent (0));
576  const IndexType numCols = static_cast<IndexType> (A.extent (1));
577 
578  for (IndexType i = 0; i < numRows; ++i) {
579  y_elt_type y_i = y(i);
580  for (IndexType j = 0; j < numCols; ++j) {
581  y_i += alpha * A(i,j) * x(j);
582  }
583  y(i) = y_i;
584  }
585  }
586 };
587 
588 template<class VecType1,
589  class BlkType,
590  class VecType2,
591  class CoeffType,
592  class IndexType>
593 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
594  Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
595 {
596  static KOKKOS_INLINE_FUNCTION void
597  run (const CoeffType& alpha,
598  const BlkType& A,
599  const VecType1& x,
600  const VecType2& y)
601  {
602  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
603  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
604  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
605  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
606  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
607 
608  const IndexType numRows = static_cast<IndexType> (A.extent (0));
609  const IndexType numCols = static_cast<IndexType> (A.extent (1));
610  const A_elt_type* const A_raw = A.data ();
611 
612  for (IndexType i = 0; i < numRows; ++i) {
613  y_elt_type y_i = y(i);
614  const A_elt_type* const A_i = A_raw + i*numCols;
615  for (IndexType j = 0; j < numCols; ++j) {
616  y_i += alpha * A_i[j] * x(j);
617  }
618  y(i) = y_i;
619  }
620  }
621 };
622 
623 template<class VecType1,
624  class BlkType,
625  class VecType2,
626  class CoeffType,
627  class IndexType>
628 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
629  Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
630 {
631  static KOKKOS_INLINE_FUNCTION void
632  run (const CoeffType& alpha,
633  const BlkType& A,
634  const VecType1& x,
635  const VecType2& y)
636  {
637  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
638  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
639  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
640  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
641 
642  const A_elt_type* const A_raw = A.data ();
643  const IndexType numRows = static_cast<IndexType> (A.extent (0));
644  const IndexType numCols = static_cast<IndexType> (A.extent (1));
645  for (IndexType j = 0; j < numCols; ++j) {
646  const A_elt_type* const A_j = A_raw + j*numRows;
647  for (IndexType i = 0; i < numRows; ++i) {
648  y(i) += alpha * A_j[i] * x(i);
649  }
650  }
651  }
652 };
653 
654 } // namespace Impl
655 
658 template<class ViewType,
659  class CoefficientType,
660  class LayoutType = typename ViewType::array_layout,
661  class IndexType = int,
662  const int rank = ViewType::rank>
663 KOKKOS_INLINE_FUNCTION void
664 SCAL (const CoefficientType& alpha, const ViewType& x)
665 {
667 }
668 
670 template<class ViewType,
671  class InputType,
672  class LayoutType = typename ViewType::array_layout,
673  class IndexType = int,
674  const int rank = ViewType::rank>
675 KOKKOS_INLINE_FUNCTION void
676 FILL (const ViewType& x, const InputType& val)
677 {
679 }
680 
686 template<class CoefficientType,
687  class ViewType1,
688  class ViewType2,
689  class LayoutType1 = typename ViewType1::array_layout,
690  class LayoutType2 = typename ViewType2::array_layout,
691  class IndexType = int,
692  const int rank = ViewType1::rank>
693 KOKKOS_INLINE_FUNCTION void
694 AXPY (const CoefficientType& alpha,
695  const ViewType1& x,
696  const ViewType2& y)
697 {
698  static_assert (static_cast<int> (ViewType1::rank) ==
699  static_cast<int> (ViewType2::rank),
700  "AXPY: x and y must have the same rank.");
701  Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
702  IndexType, rank>::run (alpha, x, y);
703 }
704 
713 template<class ViewType1,
714  class ViewType2,
715  class LayoutType1 = typename ViewType1::array_layout,
716  class LayoutType2 = typename ViewType2::array_layout,
717  class IndexType = int,
718  const int rank = ViewType1::rank>
719 KOKKOS_INLINE_FUNCTION void
720 COPY (const ViewType1& x, const ViewType2& y)
721 {
722  static_assert (static_cast<int> (ViewType1::rank) ==
723  static_cast<int> (ViewType2::rank),
724  "COPY: x and y must have the same rank.");
725  Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
726  rank>::run (x, y);
727 }
728 
740 template<class VecType1,
741  class BlkType,
742  class VecType2,
743  class CoeffType,
744  class IndexType = int>
745 KOKKOS_INLINE_FUNCTION void
746 GEMV (const CoeffType& alpha,
747  const BlkType& A,
748  const VecType1& x,
749  const VecType2& y)
750 {
751  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
752  IndexType>::run (alpha, A, x, y);
753 }
754 
762 template<class ViewType1,
763  class ViewType2,
764  class ViewType3,
765  class CoefficientType,
766  class IndexType = int>
767 KOKKOS_INLINE_FUNCTION void
768 GEMM (const char transA[],
769  const char transB[],
770  const CoefficientType& alpha,
771  const ViewType1& A,
772  const ViewType2& B,
773  const CoefficientType& beta,
774  const ViewType3& C)
775 {
776  // Assert that A, B, and C are in fact matrices
777  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
778  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
779  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
780 
781  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
782  typedef Kokkos::Details::ArithTraits<Scalar> STS;
783  const Scalar ZERO = STS::zero();
784  const Scalar ONE = STS::one();
785 
786  // Get the dimensions
787  IndexType m, n, k;
788  if(transA[0] == 'N' || transA[0] == 'n') {
789  m = static_cast<IndexType> (A.extent (0));
790  n = static_cast<IndexType> (A.extent (1));
791  }
792  else {
793  m = static_cast<IndexType> (A.extent (1));
794  n = static_cast<IndexType> (A.extent (0));
795  }
796  k = static_cast<IndexType> (C.extent (1));
797 
798  // quick return if possible
799  if(alpha == ZERO && beta == ONE)
800  return;
801 
802  // And if alpha equals zero...
803  if(alpha == ZERO) {
804  if(beta == ZERO) {
805  for(IndexType i=0; i<m; i++) {
806  for(IndexType j=0; j<k; j++) {
807  C(i,j) = ZERO;
808  }
809  }
810  }
811  else {
812  for(IndexType i=0; i<m; i++) {
813  for(IndexType j=0; j<k; j++) {
814  C(i,j) = beta*C(i,j);
815  }
816  }
817  }
818  }
819 
820  // Start the operations
821  if(transB[0] == 'n' || transB[0] == 'N') {
822  if(transA[0] == 'n' || transA[0] == 'N') {
823  // Form C = alpha*A*B + beta*C
824  for(IndexType j=0; j<n; j++) {
825  if(beta == ZERO) {
826  for(IndexType i=0; i<m; i++) {
827  C(i,j) = ZERO;
828  }
829  }
830  else if(beta != ONE) {
831  for(IndexType i=0; i<m; i++) {
832  C(i,j) = beta*C(i,j);
833  }
834  }
835  for(IndexType l=0; l<k; l++) {
836  Scalar temp = alpha*B(l,j);
837  for(IndexType i=0; i<m; i++) {
838  C(i,j) = C(i,j) + temp*A(i,l);
839  }
840  }
841  }
842  }
843  else {
844  // Form C = alpha*A**T*B + beta*C
845  for(IndexType j=0; j<n; j++) {
846  for(IndexType i=0; i<m; i++) {
847  Scalar temp = ZERO;
848  for(IndexType l=0; l<k; l++) {
849  temp = temp + A(l,i)*B(l,j);
850  }
851  if(beta == ZERO) {
852  C(i,j) = alpha*temp;
853  }
854  else {
855  C(i,j) = alpha*temp + beta*C(i,j);
856  }
857  }
858  }
859  }
860  }
861  else {
862  if(transA[0] == 'n' || transA[0] == 'N') {
863  // Form C = alpha*A*B**T + beta*C
864  for(IndexType j=0; j<n; j++) {
865  if(beta == ZERO) {
866  for(IndexType i=0; i<m; i++) {
867  C(i,j) = ZERO;
868  }
869  }
870  else if(beta != ONE) {
871  for(IndexType i=0; i<m; i++) {
872  C(i,j) = beta*C(i,j);
873  }
874  }
875  for(IndexType l=0; l<k; l++) {
876  Scalar temp = alpha*B(j,l);
877  for(IndexType i=0; i<m; i++) {
878  C(i,j) = C(i,j) + temp*A(i,l);
879  }
880  }
881  }
882  }
883  else {
884  // Form C = alpha*A**T*B**T + beta*C
885  for(IndexType j=0; j<n; j++) {
886  for(IndexType i=0; i<m; i++) {
887  Scalar temp = ZERO;
888  for(IndexType l=0; l<k; l++) {
889  temp = temp + A(l,i)*B(j,l);
890  }
891  if(beta == ZERO) {
892  C(i,j) = alpha*temp;
893  }
894  else {
895  C(i,j) = alpha*temp + beta*C(i,j);
896  }
897  }
898  }
899  }
900  }
901 }
902 
904 template<class LittleBlockType,
905  class LittleVectorType>
906 KOKKOS_INLINE_FUNCTION void
907 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
908 {
909  // The type of an entry of ipiv is the index type.
910  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
911  static_assert (std::is_integral<IndexType>::value,
912  "GETF2: The type of each entry of ipiv must be an integer type.");
913  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
914  static_assert (! std::is_const<Scalar>::value,
915  "GETF2: A must not be a const View (or LittleBlock).");
916  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
917  "GETF2: ipiv must not be a const View (or LittleBlock).");
918  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
919  typedef Kokkos::Details::ArithTraits<Scalar> STS;
920  const Scalar ZERO = STS::zero();
921 
922  const IndexType numRows = static_cast<IndexType> (A.extent (0));
923  const IndexType numCols = static_cast<IndexType> (A.extent (1));
924  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
925 
926  // std::min is not a CUDA device function
927  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
928  if (pivDim < minPivDim) {
929  info = -2;
930  return;
931  }
932 
933  // Initialize info
934  info = 0;
935 
936  for(IndexType j=0; j < pivDim; j++)
937  {
938  // Find pivot and test for singularity
939  IndexType jp = j;
940  for(IndexType i=j+1; i<numRows; i++)
941  {
942  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
943  jp = i;
944  }
945  }
946  ipiv(j) = jp+1;
947 
948  if(A(jp,j) != ZERO)
949  {
950  // Apply the interchange to columns 1:N
951  if(jp != j)
952  {
953  for(IndexType i=0; i < numCols; i++)
954  {
955  Scalar temp = A(jp,i);
956  A(jp,i) = A(j,i);
957  A(j,i) = temp;
958  }
959  }
960 
961  // Compute elements J+1:M of J-th column
962  for(IndexType i=j+1; i<numRows; i++) {
963  A(i,j) = A(i,j) / A(j,j);
964  }
965  }
966  else if(info == 0) {
967  info = j;
968  }
969 
970  // Update trailing submatrix
971  for(IndexType r=j+1; r < numRows; r++)
972  {
973  for(IndexType c=j+1; c < numCols; c++) {
974  A(r,c) = A(r,c) - A(r,j) * A(j,c);
975  }
976  }
977  }
978 }
979 
980 namespace Impl {
981 
985 template<class LittleBlockType,
986  class LittleIntVectorType,
987  class LittleScalarVectorType,
988  const int rank = LittleScalarVectorType::rank>
989 struct GETRS {
990  static KOKKOS_INLINE_FUNCTION void
991  run (const char mode[],
992  const LittleBlockType& A,
993  const LittleIntVectorType& ipiv,
994  const LittleScalarVectorType& B,
995  int& info);
996 };
997 
999 template<class LittleBlockType,
1000  class LittleIntVectorType,
1001  class LittleScalarVectorType>
1002 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1003  static KOKKOS_INLINE_FUNCTION void
1004  run (const char mode[],
1005  const LittleBlockType& A,
1006  const LittleIntVectorType& ipiv,
1007  const LittleScalarVectorType& B,
1008  int& info)
1009  {
1010  // The type of an entry of ipiv is the index type.
1011  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1012  // IndexType must be signed, because this code does a countdown loop
1013  // to zero. Unsigned integers are always >= 0, even on underflow.
1014  static_assert (std::is_integral<IndexType>::value &&
1015  std::is_signed<IndexType>::value,
1016  "GETRS: The type of each entry of ipiv must be a signed integer.");
1017  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1018  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1019  "GETRS: B must not be a const View (or LittleBlock).");
1020  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1021  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1022  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
1023 
1024  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1025  const Scalar ZERO = STS::zero();
1026  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1027  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1028  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1029 
1030  info = 0;
1031 
1032  // Ensure that the matrix is square
1033  if (numRows != numCols) {
1034  info = -2;
1035  return;
1036  }
1037 
1038  // Ensure that the pivot array is sufficiently large
1039  if (pivDim < numRows) {
1040  info = -3;
1041  return;
1042  }
1043 
1044  // No transpose case
1045  if(mode[0] == 'n' || mode[0] == 'N') {
1046  // Apply row interchanges to the RHS
1047  for(IndexType i=0; i<numRows; i++) {
1048  if(ipiv(i) != i+1) {
1049  Scalar temp = B(i);
1050  B(i) = B(ipiv(i)-1);
1051  B(ipiv(i)-1) = temp;
1052  }
1053  }
1054 
1055  // Solve Lx=b, overwriting b with x
1056  for(IndexType r=1; r < numRows; r++) {
1057  for(IndexType c=0; c < r; c++) {
1058  B(r) = B(r) - A(r,c)*B(c);
1059  }
1060  }
1061 
1062  // Solve Ux=b, overwriting b with x
1063  for(IndexType r=numRows-1; r >= 0; r--) {
1064  // Check whether U is singular
1065  if(A(r,r) == ZERO) {
1066  info = r+1;
1067  return;
1068  }
1069 
1070  for(IndexType c=r+1; c < numCols; c++) {
1071  B(r) = B(r) - A(r,c)*B(c);
1072  }
1073  B(r) = B(r) / A(r,r);
1074  }
1075  }
1076  // Transpose case
1077  else if(mode[0] == 't' || mode[0] == 'T') {
1078  info = -1; // NOT YET IMPLEMENTED
1079  return;
1080  }
1081  // Conjugate transpose case
1082  else if (mode[0] == 'c' || mode[0] == 'C') {
1083  info = -1; // NOT YET IMPLEMENTED
1084  return;
1085  }
1086  else { // invalid mode
1087  info = -1;
1088  return;
1089  }
1090  }
1091 };
1092 
1093 
1095 template<class LittleBlockType,
1096  class LittleIntVectorType,
1097  class LittleScalarVectorType>
1098 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1099  static KOKKOS_INLINE_FUNCTION void
1100  run (const char mode[],
1101  const LittleBlockType& A,
1102  const LittleIntVectorType& ipiv,
1103  const LittleScalarVectorType& B,
1104  int& info)
1105  {
1106  // The type of an entry of ipiv is the index type.
1107  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1108  static_assert (std::is_integral<IndexType>::value,
1109  "GETRS: The type of each entry of ipiv must be an integer type.");
1110  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1111  "GETRS: B must not be a const View (or LittleBlock).");
1112  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1113  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1114  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1115 
1116  // The current implementation iterates over one right-hand side at
1117  // a time. It might be faster to do this differently, but this
1118  // should work for now.
1119  const IndexType numRhs = B.extent (1);
1120  info = 0;
1121 
1122  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1123  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1125  if (info != 0) {
1126  return;
1127  }
1128  }
1129  }
1130 };
1131 
1132 } // namespace Impl
1133 
1137 template<class LittleBlockType,
1138  class LittleIntVectorType,
1139  class LittleScalarVectorType>
1140 KOKKOS_INLINE_FUNCTION void
1141 GETRS (const char mode[],
1142  const LittleBlockType& A,
1143  const LittleIntVectorType& ipiv,
1144  const LittleScalarVectorType& B,
1145  int& info)
1146 {
1147  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1148  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1149 }
1150 
1151 
1166 template<class LittleBlockType,
1167  class LittleIntVectorType,
1168  class LittleScalarVectorType>
1169 KOKKOS_INLINE_FUNCTION void
1170 GETRI (const LittleBlockType& A,
1171  const LittleIntVectorType& ipiv,
1172  const LittleScalarVectorType& work,
1173  int& info)
1174 {
1175  // The type of an entry of ipiv is the index type.
1176  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1177  // IndexType must be signed, because this code does a countdown loop
1178  // to zero. Unsigned integers are always >= 0, even on underflow.
1179  static_assert (std::is_integral<IndexType>::value &&
1180  std::is_signed<IndexType>::value,
1181  "GETRI: The type of each entry of ipiv must be a signed integer.");
1182  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1183  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1184  "GETRI: A must not be a const View (or LittleBlock).");
1185  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1186  "GETRI: work must not be a const View (or LittleBlock).");
1187  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1188  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1189  const Scalar ZERO = STS::zero();
1190  const Scalar ONE = STS::one();
1191 
1192  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1193  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1194  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1195  const IndexType workDim = static_cast<IndexType> (work.extent (0));
1196 
1197  info = 0;
1198 
1199  // Ensure that the matrix is square
1200  if (numRows != numCols) {
1201  info = -1;
1202  return;
1203  }
1204 
1205  // Ensure that the pivot array is sufficiently large
1206  if (pivDim < numRows) {
1207  info = -2;
1208  return;
1209  }
1210 
1211  // Ensure that the work array is sufficiently large
1212  if (workDim < numRows) {
1213  info = -3;
1214  return;
1215  }
1216 
1217  // Form Uinv in place
1218  for(IndexType j=0; j < numRows; j++) {
1219  if(A(j,j) == ZERO) {
1220  info = j+1;
1221  return;
1222  }
1223 
1224  A(j,j) = ONE / A(j,j);
1225 
1226  // Compute elements 1:j-1 of j-th column
1227  for(IndexType r=0; r < j; r++) {
1228  A(r,j) = A(r,r)*A(r,j);
1229  for(IndexType c=r+1; c < j; c++) {
1230  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1231  }
1232  }
1233  for(IndexType r=0; r < j; r++) {
1234  A(r,j) = -A(j,j)*A(r,j);
1235  }
1236  }
1237 
1238  // Compute Ainv by solving A\L = Uinv
1239  for(IndexType j = numCols-2; j >= 0; j--) {
1240  // Copy lower triangular data to work array and replace with 0
1241  for(IndexType r=j+1; r < numRows; r++) {
1242  work(r) = A(r,j);
1243  A(r,j) = 0;
1244  }
1245 
1246  for(IndexType r=0; r < numRows; r++) {
1247  for(IndexType i=j+1; i < numRows; i++) {
1248  A(r,j) = A(r,j) - work(i)*A(r,i);
1249  }
1250  }
1251  }
1252 
1253  // Apply column interchanges
1254  for(IndexType j=numRows-1; j >= 0; j--) {
1255  IndexType jp = ipiv(j)-1;
1256  if(j != jp) {
1257  for(IndexType r=0; r < numRows; r++) {
1258  Scalar temp = A(r,j);
1259  A(r,j) = A(r,jp);
1260  A(r,jp) = temp;
1261  }
1262  }
1263  }
1264 }
1265 
1266 
1267 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1268 // an implementation for trans != 'N' (the transpose and conjugate
1269 // transpose cases).
1270 #if 0
1271 template<class LittleBlockType,
1272  class LittleVectorTypeX,
1273  class LittleVectorTypeY,
1274  class CoefficientType,
1275  class IndexType = int>
1276 KOKKOS_INLINE_FUNCTION void
1277 GEMV (const char trans,
1278  const CoefficientType& alpha,
1279  const LittleBlockType& A,
1280  const LittleVectorTypeX& x,
1281  const CoefficientType& beta,
1282  const LittleVectorTypeY& y)
1283 {
1284  // y(0) returns a reference to the 0-th entry of y. Remove that
1285  // reference to get the type of each entry of y. It's OK if y has
1286  // zero entries -- this doesn't actually do y(i), it just returns
1287  // the type of that expression.
1288  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1289  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1290  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1291 
1292  if (beta == 0.0) {
1293  if (alpha == 0.0) {
1294  for (IndexType i = 0; i < numRows; ++i) {
1295  y(i) = 0.0;
1296  }
1297  }
1298  else {
1299  for (IndexType i = 0; i < numRows; ++i) {
1300  y_value_type y_i = 0.0;
1301  for (IndexType j = 0; j < numCols; ++j) {
1302  y_i += A(i,j) * x(j);
1303  }
1304  y(i) = y_i;
1305  }
1306  }
1307  }
1308  else { // beta != 0
1309  if (alpha == 0.0) {
1310  if (beta == 0.0) {
1311  for (IndexType i = 0; i < numRows; ++i) {
1312  y(i) = 0.0;
1313  }
1314  }
1315  else {
1316  for (IndexType i = 0; i < numRows; ++i) {
1317  y(i) *= beta;
1318  }
1319  }
1320  }
1321  else {
1322  for (IndexType i = 0; i < numRows; ++i) {
1323  y_value_type y_i = beta * y(i);
1324  for (IndexType j = 0; j < numCols; ++j) {
1325  y_i += alpha * A(i,j) * x(j);
1326  }
1327  y(i) = y_i;
1328  }
1329  }
1330  }
1331 }
1332 
1333 #endif // 0
1334 
1335 } // namespace Tpetra
1336 
1337 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.