Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_BLOCKVIEW_HPP
43 #define TPETRA_BLOCKVIEW_HPP
44 
52 
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
56 
57 namespace Tpetra {
58 
63 
64 namespace Impl {
65 
72 template<class ViewType1,
73  class ViewType2,
74  const int rank1 = ViewType1::rank>
75 struct AbsMax {
76  static KOKKOS_INLINE_FUNCTION void
77  run (const ViewType2& Y, const ViewType1& X);
78 };
79 
84 template<class ViewType1,
85  class ViewType2>
86 struct AbsMax<ViewType1, ViewType2, 2> {
89  static KOKKOS_INLINE_FUNCTION void
90  run (const ViewType2& Y, const ViewType1& X)
91  {
92  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
93  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
95  static_assert(! std::is_const<STY>::value,
96  "AbsMax: The type of each entry of Y must be nonconst.");
97  typedef typename std::decay<decltype (X(0,0)) >::type STX;
98  static_assert( std::is_same<STX, STY>::value,
99  "AbsMax: The type of each entry of X and Y must be the same.");
100  typedef Kokkos::ArithTraits<STY> KAT;
101 
102  const int numCols = Y.extent (1);
103  const int numRows = Y.extent (0);
104  for (int j = 0; j < numCols; ++j) {
105  for (int i = 0; i < numRows; ++i) {
106  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
107  const STX X_ij = X(i,j);
108  // NOTE: no std::max (not a CUDA __device__ function); must
109  // cast back up to complex.
110  const auto Y_ij_abs = KAT::abs (Y_ij);
111  const auto X_ij_abs = KAT::abs (X_ij);
112  Y_ij = (Y_ij_abs >= X_ij_abs) ?
113  static_cast<STY> (Y_ij_abs) :
114  static_cast<STY> (X_ij_abs);
115  }
116  }
117  }
118 };
119 
124 template<class ViewType1,
125  class ViewType2>
126 struct AbsMax<ViewType1, ViewType2, 1> {
129  static KOKKOS_INLINE_FUNCTION void
130  run (const ViewType2& Y, const ViewType1& X)
131  {
132  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
133  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
134 
135  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
136  static_assert(! std::is_const<STY>::value,
137  "AbsMax: The type of each entry of Y must be nonconst.");
138  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
139  static_assert( std::is_same<STX, STY>::value,
140  "AbsMax: The type of each entry of X and Y must be the same.");
141  typedef Kokkos::ArithTraits<STY> KAT;
142 
143  const int numRows = Y.extent (0);
144  for (int i = 0; i < numRows; ++i) {
145  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
146  const STX X_i = X(i);
147  // NOTE: no std::max (not a CUDA __device__ function); must
148  // cast back up to complex.
149  const auto Y_i_abs = KAT::abs (Y_i);
150  const auto X_i_abs = KAT::abs (X_i);
151  Y_i = (Y_i_abs >= X_i_abs) ?
152  static_cast<STY> (Y_i_abs) :
153  static_cast<STY> (X_i_abs);
154  }
155  }
156 };
157 
164 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
165 KOKKOS_INLINE_FUNCTION void
166 absMax (const ViewType2& Y, const ViewType1& X)
167 {
168  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
169  "absMax: ViewType1 and ViewType2 must have the same rank.");
171 }
172 
177 template<class ViewType,
178  class CoefficientType,
179  class IndexType = int,
180  const bool is_contiguous = false,
181  const int rank = ViewType::rank>
182 struct SCAL {
183  static KOKKOS_INLINE_FUNCTION void
184  run (const CoefficientType& alpha, const ViewType& x);
185 };
186 
189 template<class ViewType,
190  class CoefficientType,
191  class IndexType>
192 struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
194  static KOKKOS_INLINE_FUNCTION void
195  run (const CoefficientType& alpha, const ViewType& x)
196  {
197  const IndexType numRows = static_cast<IndexType> (x.extent (0));
198 
200 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
201 #pragma unroll
202 #endif
203  for (IndexType i = 0; i < numRows; ++i)
204  x(i) = alpha * x(i);
205  }
206 };
209 template<class ViewType,
210  class CoefficientType,
211  class IndexType>
212 struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
214  static KOKKOS_INLINE_FUNCTION void
215  run (const CoefficientType& alpha, const ViewType& A)
216  {
217  const IndexType numRows = static_cast<IndexType> (A.extent (0));
218  const IndexType numCols = static_cast<IndexType> (A.extent (1));
219 
220  for (IndexType j = 0; j < numCols; ++j)
221  for (IndexType i = 0; i < numRows; ++i)
222  A(i,j) = alpha * A(i,j);
223  }
224 };
225 template<class ViewType,
226  class CoefficientType,
227  class IndexType,
228  const int rank>
229 struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
231  static KOKKOS_INLINE_FUNCTION void
232  run (const CoefficientType& alpha, const ViewType& x)
233  {
234  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
235  const IndexType span = static_cast<IndexType> (x.span());
236  x_value_type *__restrict x_ptr(x.data());
237 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
238 #pragma unroll
239 #endif
240  for (IndexType i = 0; i < span; ++i)
241  x_ptr[i] = alpha * x_ptr[i];
242  }
243 };
244 
249 template<class ViewType,
250  class InputType,
251  class IndexType = int,
252  const bool is_contiguous = false,
253  const int rank = ViewType::rank>
254 struct FILL {
255  static KOKKOS_INLINE_FUNCTION void
256  run (const ViewType& x, const InputType& val);
257 };
258 
261 template<class ViewType,
262  class InputType,
263  class IndexType>
264 struct FILL<ViewType, InputType, IndexType, false, 1> {
265  static KOKKOS_INLINE_FUNCTION void
266  run (const ViewType& x, const InputType& val)
267  {
268  const IndexType numRows = static_cast<IndexType> (x.extent (0));
269  for (IndexType i = 0; i < numRows; ++i)
270  x(i) = val;
271  }
272 };
275 template<class ViewType,
276  class InputType,
277  class IndexType>
278 struct FILL<ViewType, InputType, IndexType, false, 2> {
279  static KOKKOS_INLINE_FUNCTION void
280  run (const ViewType& X, const InputType& val)
281  {
282  const IndexType numRows = static_cast<IndexType> (X.extent (0));
283  const IndexType numCols = static_cast<IndexType> (X.extent (1));
284  for (IndexType j = 0; j < numCols; ++j)
285  for (IndexType i = 0; i < numRows; ++i)
286  X(i,j) = val;
287  }
288 };
289 template<class ViewType,
290  class InputType,
291  class IndexType,
292  const int rank>
293 struct FILL<ViewType, InputType, IndexType, true, rank> {
294  static KOKKOS_INLINE_FUNCTION void
295  run (const ViewType& x, const InputType& val)
296  {
297  const IndexType span = static_cast<IndexType> (x.span());
298  auto x_ptr = x.data();
299 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
300 #pragma unroll
301 #endif
302  for (IndexType i = 0; i < span; ++i)
303  x_ptr[i] = val;
304  }
305 };
306 
307 
312 template<class CoefficientType,
313  class ViewType1,
314  class ViewType2,
315  class IndexType = int,
316  const bool is_contiguous = false,
317  const int rank = ViewType1::rank>
318 struct AXPY {
319  static KOKKOS_INLINE_FUNCTION void
320  run (const CoefficientType& alpha,
321  const ViewType1& x,
322  const ViewType2& y);
323 };
324 
327 template<class CoefficientType,
328  class ViewType1,
329  class ViewType2,
330  class IndexType>
331 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
333  static KOKKOS_INLINE_FUNCTION void
334  run (const CoefficientType& alpha,
335  const ViewType1& x,
336  const ViewType2& y)
337  {
338  using Kokkos::ArithTraits;
339  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
340  "AXPY: x and y must have the same rank.");
341 
342  const IndexType numRows = static_cast<IndexType> (y.extent (0));
343  if (alpha != ArithTraits<CoefficientType>::zero ()) {
345  for (IndexType i = 0; i < numRows; ++i)
346  y(i) += alpha * x(i);
347  }
348  }
349 };
350 
353 template<class CoefficientType,
354  class ViewType1,
355  class ViewType2,
356  class IndexType>
357 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
359  static KOKKOS_INLINE_FUNCTION void
360  run (const CoefficientType& alpha,
361  const ViewType1& X,
362  const ViewType2& Y)
363  {
364  using Kokkos::ArithTraits;
365  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
366  "AXPY: X and Y must have the same rank.");
367  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
368  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
369 
370  if (alpha != ArithTraits<CoefficientType>::zero ()) {
371  for (IndexType j = 0; j < numCols; ++j)
372  for (IndexType i = 0; i < numRows; ++i)
373  Y(i,j) += alpha * X(i,j);
374  }
375  }
376 };
377 
378 template<class CoefficientType,
379  class ViewType1,
380  class ViewType2,
381  class IndexType,
382  const int rank>
383 struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
385  static KOKKOS_INLINE_FUNCTION void
386  run (const CoefficientType& alpha,
387  const ViewType1& x,
388  const ViewType2& y)
389  {
390  using Kokkos::ArithTraits;
391  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
392  "AXPY: x and y must have the same rank.");
393 
394  if (alpha != ArithTraits<CoefficientType>::zero ()) {
395  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
396  using y_value_type = typename std::decay<decltype (*y.data()) >::type;
397  const IndexType span = static_cast<IndexType> (y.span());
398  const x_value_type *__restrict x_ptr(x.data());
399  y_value_type *__restrict y_ptr(y.data());
400 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
401 #pragma unroll
402 #endif
403  for (IndexType i = 0; i < span; ++i)
404  y_ptr[i] += alpha * x_ptr[i];
405  }
406  }
407 };
408 
413 template<class ViewType1,
414  class ViewType2,
415  class IndexType = int,
416  const bool is_contiguous = false,
417  const int rank = ViewType1::rank>
418 struct COPY {
419  static KOKKOS_INLINE_FUNCTION void
420  run (const ViewType1& x, const ViewType2& y);
421 };
422 
425 template<class ViewType1,
426  class ViewType2,
427  class IndexType>
428 struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
430  static KOKKOS_INLINE_FUNCTION void
431  run (const ViewType1& x, const ViewType2& y)
432  {
433  const IndexType numRows = static_cast<IndexType> (x.extent (0));
435  for (IndexType i = 0; i < numRows; ++i)
436  y(i) = x(i);
437  }
438 };
439 
442 template<class ViewType1,
443  class ViewType2,
444  class IndexType>
445 struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
447  static KOKKOS_INLINE_FUNCTION void
448  run (const ViewType1& X, const ViewType2& Y)
449  {
450  const IndexType numRows = static_cast<IndexType> (Y.extent (0));
451  const IndexType numCols = static_cast<IndexType> (Y.extent (1));
453  for (IndexType j = 0; j < numCols; ++j)
454  for (IndexType i = 0; i < numRows; ++i)
455  Y(i,j) = X(i,j);
456  }
457 };
458 
459 template<class ViewType1,
460  class ViewType2,
461  class IndexType,
462  const int rank>
463 struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
465  static KOKKOS_INLINE_FUNCTION void
466  run (const ViewType1& x, const ViewType2& y)
467  {
468  const IndexType span = static_cast<IndexType> (x.span());
469  using x_value_type = typename std::decay<decltype (*x.data()) >::type;
470  using y_value_type = typename std::decay<decltype (*y.data()) >::type;
471  const x_value_type *__restrict x_ptr(x.data());
472  y_value_type *__restrict y_ptr(y.data());
473 
474 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
475 #pragma unroll
476 #endif
477  for (IndexType i = 0; i < span; ++i)
478  y_ptr[i] = x_ptr[i];
479  }
480 };
481 
482 template<class VecType1,
483  class BlkType,
484  class VecType2,
485  class CoeffType,
486  class IndexType = int,
487  bool is_contiguous = false,
488  class BlkLayoutType = typename BlkType::array_layout>
489 struct GEMV {
490  static KOKKOS_INLINE_FUNCTION void
491  run (const CoeffType& alpha,
492  const BlkType& A,
493  const VecType1& x,
494  const VecType2& y)
495  {
496  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
497  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
498  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
499 
500  const IndexType numRows = static_cast<IndexType> (A.extent (0));
501  const IndexType numCols = static_cast<IndexType> (A.extent (1));
502 
504  for (IndexType i = 0; i < numRows; ++i)
505  for (IndexType j = 0; j < numCols; ++j)
506  y(i) += alpha * A(i,j) * x(j);
507  }
508 };
509 
510 template<class VecType1,
511  class BlkType,
512  class VecType2,
513  class CoeffType,
514  class IndexType>
515 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
516  static KOKKOS_INLINE_FUNCTION void
517  run (const CoeffType& alpha,
518  const BlkType& A,
519  const VecType1& x,
520  const VecType2& y)
521  {
522  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
523  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
524  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
525 
526  using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
527  using x_value_type = typename std::decay<decltype (x(0)) >::type;
528  using y_value_type = typename std::decay<decltype (y(0)) >::type;
529 
530  const IndexType numRows = static_cast<IndexType> (A.extent (0));
531  const IndexType numCols = static_cast<IndexType> (A.extent (1));
532 
533  const A_value_type *__restrict A_ptr(A.data()); const IndexType as1(A.stride(1));
534  const x_value_type *__restrict x_ptr(x.data());
535  y_value_type *__restrict y_ptr(y.data());
536 
537 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
538 #pragma unroll
539 #endif
540  for (IndexType j=0;j<numCols;++j) {
541  const x_value_type x_at_j = alpha*x_ptr[j];
542  const A_value_type *__restrict A_at_j = A_ptr + j*as1;
543 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
544 #pragma unroll
545 #endif
546  for (IndexType i=0;i<numRows;++i)
547  y_ptr[i] += A_at_j[i] * x_at_j;
548  }
549  }
550 };
551 
552 template<class VecType1,
553  class BlkType,
554  class VecType2,
555  class CoeffType,
556  class IndexType>
557 struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
558  static KOKKOS_INLINE_FUNCTION void
559  run (const CoeffType& alpha,
560  const BlkType& A,
561  const VecType1& x,
562  const VecType2& y)
563  {
564  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
565  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
566  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
567 
568  using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
569  using x_value_type = typename std::decay<decltype (x(0)) >::type;
570  using y_value_type = typename std::decay<decltype (y(0)) >::type;
571 
572  const IndexType numRows = static_cast<IndexType> (A.extent (0));
573  const IndexType numCols = static_cast<IndexType> (A.extent (1));
574 
575  const A_value_type *__restrict A_ptr(A.data()); const IndexType as0(A.stride(0));
576  const x_value_type *__restrict x_ptr(x.data());
577  y_value_type *__restrict y_ptr(y.data());
578 
579 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
580 #pragma unroll
581 #endif
582  for (IndexType i=0;i<numRows;++i) {
583  y_value_type y_at_i(0);
584  const auto A_at_i = A_ptr + i*as0;
585 #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
586 #pragma unroll
587 #endif
588  for (IndexType j=0;j<numCols;++j)
589  y_at_i += A_at_i[j] * x_ptr[j];
590  y_ptr[i] += alpha*y_at_i;
591  }
592  }
593 };
594 
595 } // namespace Impl
596 
599 template<class ViewType,
600  class CoefficientType,
601  class IndexType = int,
602  const int rank = ViewType::rank>
603 KOKKOS_INLINE_FUNCTION void
604 SCAL (const CoefficientType& alpha, const ViewType& x)
605 {
606  using LayoutType = typename ViewType::array_layout;
607  constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
608  std::is_same<LayoutType,Kokkos::LayoutRight>::value);
610 }
611 
613 template<class ViewType,
614  class InputType,
615  class IndexType = int,
616  const int rank = ViewType::rank>
617 KOKKOS_INLINE_FUNCTION void
618 FILL (const ViewType& x, const InputType& val)
619 {
620  using LayoutType = typename ViewType::array_layout;
621  constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
622  std::is_same<LayoutType,Kokkos::LayoutRight>::value);
624 }
625 
631 template<class CoefficientType,
632  class ViewType1,
633  class ViewType2,
634  class IndexType = int,
635  const int rank = ViewType1::rank>
636 KOKKOS_INLINE_FUNCTION void
637 AXPY (const CoefficientType& alpha,
638  const ViewType1& x,
639  const ViewType2& y)
640 {
641  static_assert (static_cast<int> (ViewType1::rank) ==
642  static_cast<int> (ViewType2::rank),
643  "AXPY: x and y must have the same rank.");
644  using LayoutType1 = typename ViewType1::array_layout;
645  using LayoutType2 = typename ViewType2::array_layout;
646  constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
647  constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
648  std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
649  constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
651 }
652 
661 template<class ViewType1,
662  class ViewType2,
663  class IndexType = int,
664  const int rank = ViewType1::rank>
665 KOKKOS_INLINE_FUNCTION void
666 COPY (const ViewType1& x, const ViewType2& y)
667 {
668  static_assert (static_cast<int> (ViewType1::rank) ==
669  static_cast<int> (ViewType2::rank),
670  "COPY: x and y must have the same rank.");
671  using LayoutType1 = typename ViewType1::array_layout;
672  using LayoutType2 = typename ViewType2::array_layout;
673  constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
674  constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
675  std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
676  constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
678 }
679 
691 template<class VecType1,
692  class BlkType,
693  class VecType2,
694  class CoeffType,
695  class IndexType = int>
696 KOKKOS_INLINE_FUNCTION void
697 GEMV (const CoeffType& alpha,
698  const BlkType& A,
699  const VecType1& x,
700  const VecType2& y)
701 {
702  constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
703  std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
704  constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
705  std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
706  constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
707  std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
708  constexpr bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
709  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
710 }
711 
719 template<class ViewType1,
720  class ViewType2,
721  class ViewType3,
722  class CoefficientType,
723  class IndexType = int>
724 KOKKOS_INLINE_FUNCTION void
725 GEMM (const char transA[],
726  const char transB[],
727  const CoefficientType& alpha,
728  const ViewType1& A,
729  const ViewType2& B,
730  const CoefficientType& beta,
731  const ViewType3& C)
732 {
733  // Assert that A, B, and C are in fact matrices
734  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
735  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
736  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
737 
738  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
739  typedef Kokkos::ArithTraits<Scalar> STS;
740  const Scalar ZERO = STS::zero();
741  const Scalar ONE = STS::one();
742 
743  // Get the dimensions
744  IndexType m, n, k;
745  if(transA[0] == 'N' || transA[0] == 'n') {
746  m = static_cast<IndexType> (A.extent (0));
747  n = static_cast<IndexType> (A.extent (1));
748  }
749  else {
750  m = static_cast<IndexType> (A.extent (1));
751  n = static_cast<IndexType> (A.extent (0));
752  }
753  k = static_cast<IndexType> (C.extent (1));
754 
755  // quick return if possible
756  if(alpha == ZERO && beta == ONE)
757  return;
758 
759  // And if alpha equals zero...
760  if(alpha == ZERO) {
761  if(beta == ZERO) {
762  for(IndexType i=0; i<m; i++) {
763  for(IndexType j=0; j<k; j++) {
764  C(i,j) = ZERO;
765  }
766  }
767  }
768  else {
769  for(IndexType i=0; i<m; i++) {
770  for(IndexType j=0; j<k; j++) {
771  C(i,j) = beta*C(i,j);
772  }
773  }
774  }
775  }
776 
777  // Start the operations
778  if(transB[0] == 'n' || transB[0] == 'N') {
779  if(transA[0] == 'n' || transA[0] == 'N') {
780  // Form C = alpha*A*B + beta*C
781  for(IndexType j=0; j<n; j++) {
782  if(beta == ZERO) {
783  for(IndexType i=0; i<m; i++) {
784  C(i,j) = ZERO;
785  }
786  }
787  else if(beta != ONE) {
788  for(IndexType i=0; i<m; i++) {
789  C(i,j) = beta*C(i,j);
790  }
791  }
792  for(IndexType l=0; l<k; l++) {
793  Scalar temp = alpha*B(l,j);
794  for(IndexType i=0; i<m; i++) {
795  C(i,j) = C(i,j) + temp*A(i,l);
796  }
797  }
798  }
799  }
800  else {
801  // Form C = alpha*A**T*B + beta*C
802  for(IndexType j=0; j<n; j++) {
803  for(IndexType i=0; i<m; i++) {
804  Scalar temp = ZERO;
805  for(IndexType l=0; l<k; l++) {
806  temp = temp + A(l,i)*B(l,j);
807  }
808  if(beta == ZERO) {
809  C(i,j) = alpha*temp;
810  }
811  else {
812  C(i,j) = alpha*temp + beta*C(i,j);
813  }
814  }
815  }
816  }
817  }
818  else {
819  if(transA[0] == 'n' || transA[0] == 'N') {
820  // Form C = alpha*A*B**T + beta*C
821  for(IndexType j=0; j<n; j++) {
822  if(beta == ZERO) {
823  for(IndexType i=0; i<m; i++) {
824  C(i,j) = ZERO;
825  }
826  }
827  else if(beta != ONE) {
828  for(IndexType i=0; i<m; i++) {
829  C(i,j) = beta*C(i,j);
830  }
831  }
832  for(IndexType l=0; l<k; l++) {
833  Scalar temp = alpha*B(j,l);
834  for(IndexType i=0; i<m; i++) {
835  C(i,j) = C(i,j) + temp*A(i,l);
836  }
837  }
838  }
839  }
840  else {
841  // Form C = alpha*A**T*B**T + beta*C
842  for(IndexType j=0; j<n; j++) {
843  for(IndexType i=0; i<m; i++) {
844  Scalar temp = ZERO;
845  for(IndexType l=0; l<k; l++) {
846  temp = temp + A(l,i)*B(j,l);
847  }
848  if(beta == ZERO) {
849  C(i,j) = alpha*temp;
850  }
851  else {
852  C(i,j) = alpha*temp + beta*C(i,j);
853  }
854  }
855  }
856  }
857  }
858 }
859 
861 template<class LittleBlockType,
862  class LittleVectorType>
863 KOKKOS_INLINE_FUNCTION void
864 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
865 {
866  // The type of an entry of ipiv is the index type.
867  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
868  static_assert (std::is_integral<IndexType>::value,
869  "GETF2: The type of each entry of ipiv must be an integer type.");
870  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
871  static_assert (! std::is_const<Scalar>::value,
872  "GETF2: A must not be a const View (or LittleBlock).");
873  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
874  "GETF2: ipiv must not be a const View (or LittleBlock).");
875  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
876  typedef Kokkos::ArithTraits<Scalar> STS;
877  const Scalar ZERO = STS::zero();
878 
879  const IndexType numRows = static_cast<IndexType> (A.extent (0));
880  const IndexType numCols = static_cast<IndexType> (A.extent (1));
881  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
882 
883  // std::min is not a CUDA device function
884  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
885  if (pivDim < minPivDim) {
886  info = -2;
887  return;
888  }
889 
890  // Initialize info
891  info = 0;
892 
893  for(IndexType j=0; j < pivDim; j++)
894  {
895  // Find pivot and test for singularity
896  IndexType jp = j;
897  for(IndexType i=j+1; i<numRows; i++)
898  {
899  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
900  jp = i;
901  }
902  }
903  ipiv(j) = jp+1;
904 
905  if(A(jp,j) != ZERO)
906  {
907  // Apply the interchange to columns 1:N
908  if(jp != j)
909  {
910  for(IndexType i=0; i < numCols; i++)
911  {
912  Scalar temp = A(jp,i);
913  A(jp,i) = A(j,i);
914  A(j,i) = temp;
915  }
916  }
917 
918  // Compute elements J+1:M of J-th column
919  for(IndexType i=j+1; i<numRows; i++) {
920  A(i,j) = A(i,j) / A(j,j);
921  }
922  }
923  else if(info == 0) {
924  info = j;
925  }
926 
927  // Update trailing submatrix
928  for(IndexType r=j+1; r < numRows; r++)
929  {
930  for(IndexType c=j+1; c < numCols; c++) {
931  A(r,c) = A(r,c) - A(r,j) * A(j,c);
932  }
933  }
934  }
935 }
936 
937 namespace Impl {
938 
942 template<class LittleBlockType,
943  class LittleIntVectorType,
944  class LittleScalarVectorType,
945  const int rank = LittleScalarVectorType::rank>
946 struct GETRS {
947  static KOKKOS_INLINE_FUNCTION void
948  run (const char mode[],
949  const LittleBlockType& A,
950  const LittleIntVectorType& ipiv,
951  const LittleScalarVectorType& B,
952  int& info);
953 };
954 
956 template<class LittleBlockType,
957  class LittleIntVectorType,
958  class LittleScalarVectorType>
959 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
960  static KOKKOS_INLINE_FUNCTION void
961  run (const char mode[],
962  const LittleBlockType& A,
963  const LittleIntVectorType& ipiv,
964  const LittleScalarVectorType& B,
965  int& info)
966  {
967  // The type of an entry of ipiv is the index type.
968  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
969  // IndexType must be signed, because this code does a countdown loop
970  // to zero. Unsigned integers are always >= 0, even on underflow.
971  static_assert (std::is_integral<IndexType>::value &&
972  std::is_signed<IndexType>::value,
973  "GETRS: The type of each entry of ipiv must be a signed integer.");
974  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
975  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
976  "GETRS: B must not be a const View (or LittleBlock).");
977  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
978  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
979  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
980 
981  typedef Kokkos::ArithTraits<Scalar> STS;
982  const Scalar ZERO = STS::zero();
983  const IndexType numRows = static_cast<IndexType> (A.extent (0));
984  const IndexType numCols = static_cast<IndexType> (A.extent (1));
985  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
986 
987  info = 0;
988 
989  // Ensure that the matrix is square
990  if (numRows != numCols) {
991  info = -2;
992  return;
993  }
994 
995  // Ensure that the pivot array is sufficiently large
996  if (pivDim < numRows) {
997  info = -3;
998  return;
999  }
1000 
1001  // No transpose case
1002  if(mode[0] == 'n' || mode[0] == 'N') {
1003  // Apply row interchanges to the RHS
1004  for(IndexType i=0; i<numRows; i++) {
1005  if(ipiv(i) != i+1) {
1006  Scalar temp = B(i);
1007  B(i) = B(ipiv(i)-1);
1008  B(ipiv(i)-1) = temp;
1009  }
1010  }
1011 
1012  // Solve Lx=b, overwriting b with x
1013  for(IndexType r=1; r < numRows; r++) {
1014  for(IndexType c=0; c < r; c++) {
1015  B(r) = B(r) - A(r,c)*B(c);
1016  }
1017  }
1018 
1019  // Solve Ux=b, overwriting b with x
1020  for(IndexType r=numRows-1; r >= 0; r--) {
1021  // Check whether U is singular
1022  if(A(r,r) == ZERO) {
1023  info = r+1;
1024  return;
1025  }
1026 
1027  for(IndexType c=r+1; c < numCols; c++) {
1028  B(r) = B(r) - A(r,c)*B(c);
1029  }
1030  B(r) = B(r) / A(r,r);
1031  }
1032  }
1033  // Transpose case
1034  else if(mode[0] == 't' || mode[0] == 'T') {
1035  info = -1; // NOT YET IMPLEMENTED
1036  return;
1037  }
1038  // Conjugate transpose case
1039  else if (mode[0] == 'c' || mode[0] == 'C') {
1040  info = -1; // NOT YET IMPLEMENTED
1041  return;
1042  }
1043  else { // invalid mode
1044  info = -1;
1045  return;
1046  }
1047  }
1048 };
1049 
1050 
1052 template<class LittleBlockType,
1053  class LittleIntVectorType,
1054  class LittleScalarVectorType>
1055 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1056  static KOKKOS_INLINE_FUNCTION void
1057  run (const char mode[],
1058  const LittleBlockType& A,
1059  const LittleIntVectorType& ipiv,
1060  const LittleScalarVectorType& B,
1061  int& info)
1062  {
1063  // The type of an entry of ipiv is the index type.
1064  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1065  static_assert (std::is_integral<IndexType>::value,
1066  "GETRS: The type of each entry of ipiv must be an integer type.");
1067  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1068  "GETRS: B must not be a const View (or LittleBlock).");
1069  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1070  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1071  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1072 
1073  // The current implementation iterates over one right-hand side at
1074  // a time. It might be faster to do this differently, but this
1075  // should work for now.
1076  const IndexType numRhs = B.extent (1);
1077  info = 0;
1078 
1079  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1080  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1082  if (info != 0) {
1083  return;
1084  }
1085  }
1086  }
1087 };
1088 
1089 } // namespace Impl
1090 
1094 template<class LittleBlockType,
1095  class LittleIntVectorType,
1096  class LittleScalarVectorType>
1097 KOKKOS_INLINE_FUNCTION void
1098 GETRS (const char mode[],
1099  const LittleBlockType& A,
1100  const LittleIntVectorType& ipiv,
1101  const LittleScalarVectorType& B,
1102  int& info)
1103 {
1104  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1105  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1106 }
1107 
1108 
1123 template<class LittleBlockType,
1124  class LittleIntVectorType,
1125  class LittleScalarVectorType>
1126 KOKKOS_INLINE_FUNCTION void
1127 GETRI (const LittleBlockType& A,
1128  const LittleIntVectorType& ipiv,
1129  const LittleScalarVectorType& work,
1130  int& info)
1131 {
1132  // The type of an entry of ipiv is the index type.
1133  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1134  // IndexType must be signed, because this code does a countdown loop
1135  // to zero. Unsigned integers are always >= 0, even on underflow.
1136  static_assert (std::is_integral<IndexType>::value &&
1137  std::is_signed<IndexType>::value,
1138  "GETRI: The type of each entry of ipiv must be a signed integer.");
1139  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1140  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1141  "GETRI: A must not be a const View (or LittleBlock).");
1142  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1143  "GETRI: work must not be a const View (or LittleBlock).");
1144  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1145  typedef Kokkos::ArithTraits<Scalar> STS;
1146  const Scalar ZERO = STS::zero();
1147  const Scalar ONE = STS::one();
1148 
1149  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1150  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1151  const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1152  const IndexType workDim = static_cast<IndexType> (work.extent (0));
1153 
1154  info = 0;
1155 
1156  // Ensure that the matrix is square
1157  if (numRows != numCols) {
1158  info = -1;
1159  return;
1160  }
1161 
1162  // Ensure that the pivot array is sufficiently large
1163  if (pivDim < numRows) {
1164  info = -2;
1165  return;
1166  }
1167 
1168  // Ensure that the work array is sufficiently large
1169  if (workDim < numRows) {
1170  info = -3;
1171  return;
1172  }
1173 
1174  // Form Uinv in place
1175  for(IndexType j=0; j < numRows; j++) {
1176  if(A(j,j) == ZERO) {
1177  info = j+1;
1178  return;
1179  }
1180 
1181  A(j,j) = ONE / A(j,j);
1182 
1183  // Compute elements 1:j-1 of j-th column
1184  for(IndexType r=0; r < j; r++) {
1185  A(r,j) = A(r,r)*A(r,j);
1186  for(IndexType c=r+1; c < j; c++) {
1187  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1188  }
1189  }
1190  for(IndexType r=0; r < j; r++) {
1191  A(r,j) = -A(j,j)*A(r,j);
1192  }
1193  }
1194 
1195  // Compute Ainv by solving A\L = Uinv
1196  for(IndexType j = numCols-2; j >= 0; j--) {
1197  // Copy lower triangular data to work array and replace with 0
1198  for(IndexType r=j+1; r < numRows; r++) {
1199  work(r) = A(r,j);
1200  A(r,j) = 0;
1201  }
1202 
1203  for(IndexType r=0; r < numRows; r++) {
1204  for(IndexType i=j+1; i < numRows; i++) {
1205  A(r,j) = A(r,j) - work(i)*A(r,i);
1206  }
1207  }
1208  }
1209 
1210  // Apply column interchanges
1211  for(IndexType j=numRows-1; j >= 0; j--) {
1212  IndexType jp = ipiv(j)-1;
1213  if(j != jp) {
1214  for(IndexType r=0; r < numRows; r++) {
1215  Scalar temp = A(r,j);
1216  A(r,j) = A(r,jp);
1217  A(r,jp) = temp;
1218  }
1219  }
1220  }
1221 }
1222 
1223 
1224 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1225 // an implementation for trans != 'N' (the transpose and conjugate
1226 // transpose cases).
1227 #if 0
1228 template<class LittleBlockType,
1229  class LittleVectorTypeX,
1230  class LittleVectorTypeY,
1231  class CoefficientType,
1232  class IndexType = int>
1233 KOKKOS_INLINE_FUNCTION void
1234 GEMV (const char trans,
1235  const CoefficientType& alpha,
1236  const LittleBlockType& A,
1237  const LittleVectorTypeX& x,
1238  const CoefficientType& beta,
1239  const LittleVectorTypeY& y)
1240 {
1241  // y(0) returns a reference to the 0-th entry of y. Remove that
1242  // reference to get the type of each entry of y. It's OK if y has
1243  // zero entries -- this doesn't actually do y(i), it just returns
1244  // the type of that expression.
1245  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1246  const IndexType numRows = static_cast<IndexType> (A.extent (0));
1247  const IndexType numCols = static_cast<IndexType> (A.extent (1));
1248 
1249  if (beta == 0.0) {
1250  if (alpha == 0.0) {
1251  for (IndexType i = 0; i < numRows; ++i) {
1252  y(i) = 0.0;
1253  }
1254  }
1255  else {
1256  for (IndexType i = 0; i < numRows; ++i) {
1257  y_value_type y_i = 0.0;
1258  for (IndexType j = 0; j < numCols; ++j) {
1259  y_i += A(i,j) * x(j);
1260  }
1261  y(i) = y_i;
1262  }
1263  }
1264  }
1265  else { // beta != 0
1266  if (alpha == 0.0) {
1267  if (beta == 0.0) {
1268  for (IndexType i = 0; i < numRows; ++i) {
1269  y(i) = 0.0;
1270  }
1271  }
1272  else {
1273  for (IndexType i = 0; i < numRows; ++i) {
1274  y(i) *= beta;
1275  }
1276  }
1277  }
1278  else {
1279  for (IndexType i = 0; i < numRows; ++i) {
1280  y_value_type y_i = beta * y(i);
1281  for (IndexType j = 0; j < numCols; ++j) {
1282  y_i += alpha * A(i,j) * x(j);
1283  }
1284  y(i) = y_i;
1285  }
1286  }
1287  }
1288 }
1289 
1290 #endif // 0
1291 
1292 } // namespace Tpetra
1293 
1294 #endif // TPETRA_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra::SCAL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Implementation of Tpetra::COPY function.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra::AXPY function.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Computes the solution to Ax=b.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Implementation of Tpetra::FILL function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.