Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_Sequential_SparseKernels.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos: Node API and Parallel Node Kernels
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
45 #define KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
46 
64 
65 #include <KokkosLinAlg_config.h>
66 #include <Kokkos_ArithTraits.hpp>
67 
68 namespace Kokkos {
69 namespace Sequential {
70 
97 template<class LocalOrdinal,
98  class OffsetType,
99  class MatrixScalar,
100  class DomainScalar,
101  class RangeScalar>
102 void
103 gaussSeidel (const LocalOrdinal numRows,
104  const LocalOrdinal numCols,
105  const OffsetType* const ptr,
106  const LocalOrdinal* const ind,
107  const MatrixScalar* const val,
108  const DomainScalar* const B,
109  const OffsetType b_stride,
110  RangeScalar* const X,
111  const OffsetType x_stride,
112  const MatrixScalar* const D,
113  const MatrixScalar omega,
114  const Kokkos::ESweepDirection direction)
115 {
117  typedef LocalOrdinal LO;
118  const OffsetType theNumRows = static_cast<OffsetType> (numRows);
119  const OffsetType theNumCols = static_cast<OffsetType> (numCols);
120 
121  if (numRows == 0 || numCols == 0) {
122  return; // Nothing to do.
123  }
124  else if (numRows > 0 && ptr[numRows] == 0) {
125  // All the off-diagonal entries of A are zero, and all the
126  // diagonal entries are (implicitly) 1. Therefore compute: X :=
127  // (1 - omega) X + omega B. There's no need to care about the
128  // direction, since there are no cross-row data dependencies in
129  // this case.
130  const MatrixScalar oneMinusOmega =
131  ArithTraits<MatrixScalar>::one () - omega;
132  for (OffsetType j = 0; j < theNumCols; ++j) {
133  RangeScalar* const x_j = X + j*x_stride;
134  const DomainScalar* const b_j = B + j*b_stride;
135  for (OffsetType i = 0; i < theNumRows; ++i) {
136  x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
137  }
138  }
139  return;
140  }
141 
142  if (numCols == 1) {
143  if (direction == Kokkos::Forward) {
144  for (LO i = 0; i < numRows; ++i) {
145  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
146  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
147  const LO j = ind[k];
148  const MatrixScalar A_ij = val[k];
149  x_temp += A_ij * X[j];
150  }
151  X[i] += omega * D[i] * (B[i] - x_temp);
152  }
153  } else if (direction == Kokkos::Backward) {
154  // Split the loop so that it is correct even if LO is unsigned.
155  // It's a bad idea for LO to be unsigned, but we want this to
156  // work nevertheless.
157  for (LO i = numRows - 1; i != 0; --i) {
158  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
159  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
160  const LO j = ind[k];
161  const MatrixScalar A_ij = val[k];
162  x_temp += A_ij * X[j];
163  }
164  X[i] += omega * D[i] * (B[i] - x_temp);
165  }
166  { // last loop iteration
167  const LO i = 0;
168  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
169  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
170  const LO j = ind[k];
171  const MatrixScalar A_ij = val[k];
172  x_temp += A_ij * X[j];
173  }
174  X[i] += omega * D[i] * (B[i] - x_temp);
175  }
176  }
177  }
178  else { // numCols > 1
179  // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with
180  // multiple columns becomes important, we can add unrolled
181  // implementations. The implementation below is not unrolled.
182  // It may also be reasonable to parallelize over right-hand
183  // sides, if there are enough of them, especially if the matrix
184  // fits in cache.
185  Teuchos::Array<RangeScalar> temp (numCols);
186  RangeScalar* const x_temp = temp.getRawPtr ();
187 
188  if (direction == Kokkos::Forward) {
189  for (LO i = 0; i < numRows; ++i) {
190  for (OffsetType c = 0; c < theNumCols; ++c) {
191  x_temp[c] = ArithTraits<RangeScalar>::zero ();
192  }
193  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
194  const LO j = ind[k];
195  const MatrixScalar A_ij = val[k];
196  for (OffsetType c = 0; c < theNumCols; ++c) {
197  x_temp[c] += A_ij * X[j + x_stride*c];
198  }
199  }
200  for (OffsetType c = 0; c < theNumCols; ++c) {
201  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
202  }
203  }
204  } else if (direction == Kokkos::Backward) { // backward mode
205  // Split the loop so that it is correct even if LO is unsigned.
206  // It's a bad idea for LO to be unsigned, but we want this to
207  // work nevertheless.
208  for (LO i = numRows - 1; i != 0; --i) {
209  for (OffsetType c = 0; c < theNumCols; ++c) {
210  x_temp[c] = ArithTraits<RangeScalar>::zero ();
211  }
212  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
213  const LO j = ind[k];
214  const MatrixScalar A_ij = val[k];
215  for (OffsetType c = 0; c < theNumCols; ++c) {
216  x_temp[c] += A_ij * X[j + x_stride*c];
217  }
218  }
219  for (OffsetType c = 0; c < theNumCols; ++c) {
220  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
221  }
222  }
223  { // last loop iteration
224  const LO i = 0;
225  for (OffsetType c = 0; c < theNumCols; ++c) {
226  x_temp[c] = ArithTraits<RangeScalar>::zero ();
227  }
228  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
229  const LO j = ind[k];
230  const MatrixScalar A_ij = val[k];
231  for (OffsetType c = 0; c < theNumCols; ++c) {
232  x_temp[c] += A_ij * X[j + x_stride*c];
233  }
234  }
235  for (OffsetType c = 0; c < theNumCols; ++c) {
236  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
237  }
238  }
239  }
240  }
241 }
242 
243 
277 template<class LocalOrdinal,
278  class OffsetType,
279  class MatrixScalar,
280  class DomainScalar,
281  class RangeScalar>
282 void
283 reorderedGaussSeidel (const LocalOrdinal numRows,
284  const LocalOrdinal numCols,
285  const OffsetType* const ptr,
286  const LocalOrdinal* const ind,
287  const MatrixScalar* const val,
288  const DomainScalar* const B,
289  const OffsetType b_stride,
290  RangeScalar* const X,
291  const OffsetType x_stride,
292  const MatrixScalar* const D,
293  const LocalOrdinal* const rowInd,
294  const LocalOrdinal numRowInds, // length of rowInd
295  const MatrixScalar omega,
296  const Kokkos::ESweepDirection direction)
297 {
299  typedef LocalOrdinal LO;
300  const OffsetType theNumRows = static_cast<OffsetType> (numRows);
301  const OffsetType theNumCols = static_cast<OffsetType> (numCols);
302 
303  if (numRows == 0 || numCols == 0) {
304  return; // Nothing to do.
305  }
306  else if (numRows > 0 && ptr[numRows] == 0) {
307  // All the off-diagonal entries of A are zero, and all the
308  // diagonal entries are (implicitly) 1. Therefore compute: X :=
309  // (1 - omega) X + omega B. There's no need to care about the
310  // direction or row ordering, since there are no cross-row data
311  // dependencies in this case.
312  const MatrixScalar oneMinusOmega =
313  ArithTraits<MatrixScalar>::one () - omega;
314  for (OffsetType j = 0; j < theNumCols; ++j) {
315  RangeScalar* const x_j = X + j*x_stride;
316  const DomainScalar* const b_j = B + j*b_stride;
317  for (OffsetType i = 0; i < theNumRows; ++i) {
318  x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
319  }
320  }
321  return;
322  }
323 
324  if (numCols == 1) {
325  if (direction == Kokkos::Forward) {
326  for (LO ii = 0; ii < numRowInds; ++ii) {
327  LO i = rowInd[ii];
328  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
329  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
330  const LO j = ind[k];
331  const MatrixScalar A_ij = val[k];
332  x_temp += A_ij * X[j];
333  }
334  X[i] += omega * D[i] * (B[i] - x_temp);
335  }
336  } else if (direction == Kokkos::Backward) {
337  // Split the loop so that it is correct even if LO is unsigned.
338  // It's a bad idea for LO to be unsigned, but we want this to
339  // work nevertheless.
340  for (LO ii = numRowInds - 1; ii != 0; --ii) {
341  LO i = rowInd[ii];
342  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
343  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
344  const LO j = ind[k];
345  const MatrixScalar A_ij = val[k];
346  x_temp += A_ij * X[j];
347  }
348  X[i] += omega * D[i] * (B[i] - x_temp);
349  }
350  { // last loop iteration
351  const LO ii = 0;
352  LO i = rowInd[ii];
353  RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
354  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
355  const LO j = ind[k];
356  const MatrixScalar A_ij = val[k];
357  x_temp += A_ij * X[j];
358  }
359  X[i] += omega * D[i] * (B[i] - x_temp);
360  }
361  }
362  }
363  else { // numCols > 1
364  // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with
365  // multiple columns becomes important, we can add unrolled
366  // implementations. The implementation below is not unrolled.
367  // It may also be reasonable to parallelize over right-hand
368  // sides, if there are enough of them, especially if the matrix
369  // fits in cache.
370  Teuchos::Array<RangeScalar> temp (numCols);
371  RangeScalar* const x_temp = temp.getRawPtr ();
372 
373  if (direction == Kokkos::Forward) {
374  for (LO ii = 0; ii < numRowInds; ++ii) {
375  LO i = rowInd[ii];
376  for (OffsetType c = 0; c < theNumCols; ++c) {
378  }
379  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
380  const LO j = ind[k];
381  const MatrixScalar A_ij = val[k];
382  for (OffsetType c = 0; c < theNumCols; ++c) {
383  x_temp[c] += A_ij * X[j + x_stride*c];
384  }
385  }
386  for (OffsetType c = 0; c < theNumCols; ++c) {
387  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
388  }
389  }
390  } else if (direction == Kokkos::Backward) { // backward mode
391  // Split the loop so that it is correct even if LO is unsigned.
392  // It's a bad idea for LO to be unsigned, but we want this to
393  // work nevertheless.
394  for (LO ii = numRowInds - 1; ii != 0; --ii) {
395  LO i = rowInd[ii];
396  for (OffsetType c = 0; c < theNumCols; ++c) {
398  }
399  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
400  const LO j = ind[k];
401  const MatrixScalar A_ij = val[k];
402  for (OffsetType c = 0; c < theNumCols; ++c) {
403  x_temp[c] += A_ij * X[j + x_stride*c];
404  }
405  }
406  for (OffsetType c = 0; c < theNumCols; ++c) {
407  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
408  }
409  }
410  { // last loop iteration
411  const LO ii = 0;
412  LO i = rowInd[ii];
413  for (OffsetType c = 0; c < theNumCols; ++c) {
415  }
416  for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
417  const LO j = ind[k];
418  const MatrixScalar A_ij = val[k];
419  for (OffsetType c = 0; c < theNumCols; ++c) {
420  x_temp[c] += A_ij * X[j + x_stride*c];
421  }
422  }
423  for (OffsetType c = 0; c < theNumCols; ++c) {
424  X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
425  }
426  }
427  }
428  }
429 }
430 
431 
432 template<class CrsMatrixType,
433  class DomainMultiVectorType,
434  class RangeMultiVectorType>
435 void
436 lowerTriSolveCsrUnitDiag (RangeMultiVectorType X,
437  const CrsMatrixType& A,
438  DomainMultiVectorType Y)
439 {
440  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
441  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
442  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
443 
444  const local_ordinal_type numRows = A.numRows ();
445  //const local_ordinal_type numCols = A.numCols ();
446  const local_ordinal_type numVecs = X.dimension_1 ();
447  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
448  typename CrsMatrixType::index_type ind = A.graph.entries;
449  typename CrsMatrixType::values_type val = A.values;
450 
451  for (local_ordinal_type r = 0; r < numRows; ++r) {
452  for (local_ordinal_type j = 0; j < numVecs; ++j) {
453  X(r, j) = Y(r, j);
454  }
455  const offset_type beg = ptr(r);
456  const offset_type end = ptr(r+1);
457  for (offset_type k = beg; k < end; ++k) {
458  const matrix_scalar_type A_rc = val(k);
459  const local_ordinal_type c = ind(k);
460  for (local_ordinal_type j = 0; j < numVecs; ++j) {
461  X(r, j) -= A_rc * X(c, j);
462  }
463  } // for each entry A_rc in the current row r
464  } // for each row r
465 }
466 
467 
468 template<class CrsMatrixType,
469  class DomainMultiVectorType,
470  class RangeMultiVectorType>
471 void
472 lowerTriSolveCsr (RangeMultiVectorType X,
473  const CrsMatrixType& A,
474  DomainMultiVectorType Y)
475 {
476  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
477  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
478  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
480 
481  const local_ordinal_type numRows = A.numRows ();
482  //const local_ordinal_type numCols = A.numCols ();
483  const local_ordinal_type numVecs = X.dimension_1 ();
484  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
485  typename CrsMatrixType::index_type ind = A.graph.entries;
486  typename CrsMatrixType::values_type val = A.values;
487 
488  for (local_ordinal_type r = 0; r < numRows; ++r) {
489  for (local_ordinal_type j = 0; j < numVecs; ++j) {
490  X(r, j) = Y(r, j);
491  }
492 
493  matrix_scalar_type A_rr = STS::zero ();
494  const offset_type beg = ptr(r);
495  const offset_type end = ptr(r+1);
496 
497  for (offset_type k = beg; k < end; ++k) {
498  const matrix_scalar_type A_rc = val(k);
499  const local_ordinal_type c = ind(k);
500  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
501  // has equal local row and column indices. That may not
502  // necessarily hold, depending on the row and column Maps. The
503  // way to fix this would be for Tpetra::CrsMatrix to remember
504  // the local column index of the diagonal entry (if there is
505  // one) in each row, and pass that along to this function.
506  if (r == c) {
507  A_rr += A_rc;
508  } else {
509  for (local_ordinal_type j = 0; j < numVecs; ++j) {
510  X(r, j) -= A_rc * X(c, j);
511  }
512  }
513  } // for each entry A_rc in the current row r
514  for (local_ordinal_type j = 0; j < numVecs; ++j) {
515  X(r, j) /= A_rr;
516  }
517  } // for each row r
518 }
519 
520 
521 template<class CrsMatrixType,
522  class DomainMultiVectorType,
523  class RangeMultiVectorType>
524 void
525 upperTriSolveCsrUnitDiag (RangeMultiVectorType X,
526  const CrsMatrixType& A,
527  DomainMultiVectorType Y)
528 {
529  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
530  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
531  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
532 
533  const local_ordinal_type numRows = A.numRows ();
534  //const local_ordinal_type numCols = A.numCols ();
535  const local_ordinal_type numVecs = X.dimension_1 ();
536  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
537  typename CrsMatrixType::index_type ind = A.graph.entries;
538  typename CrsMatrixType::values_type val = A.values;
539 
540  // If local_ordinal_type is unsigned and numRows is 0, the loop
541  // below will have entirely the wrong number of iterations.
542  if (numRows == 0) {
543  return;
544  }
545 
546  // Don't use r >= 0 as the test, because that fails if
547  // local_ordinal_type is unsigned. We do r == 0 (last
548  // iteration) below.
549  for (local_ordinal_type r = numRows - 1; r != 0; --r) {
550  for (local_ordinal_type j = 0; j < numVecs; ++j) {
551  X(r, j) = Y(r, j);
552  }
553  const offset_type beg = ptr(r);
554  const offset_type end = ptr(r+1);
555  for (offset_type k = beg; k < end; ++k) {
556  const matrix_scalar_type A_rc = val(k);
557  const local_ordinal_type c = ind(k);
558  for (local_ordinal_type j = 0; j < numVecs; ++j) {
559  X(r, j) -= A_rc * X(c, j);
560  }
561  } // for each entry A_rc in the current row r
562  } // for each row r
563 
564  // Last iteration: r = 0.
565  {
566  const local_ordinal_type r = 0;
567  for (local_ordinal_type j = 0; j < numVecs; ++j) {
568  X(r, j) = Y(r, j);
569  }
570  const offset_type beg = ptr(r);
571  const offset_type end = ptr(r+1);
572  for (offset_type k = beg; k < end; ++k) {
573  const matrix_scalar_type A_rc = val(k);
574  const local_ordinal_type c = ind(k);
575  for (local_ordinal_type j = 0; j < numVecs; ++j) {
576  X(r, j) -= A_rc * X(c, j);
577  }
578  } // for each entry A_rc in the current row r
579  } // last iteration: r = 0
580 }
581 
582 
583 template<class CrsMatrixType,
584  class DomainMultiVectorType,
585  class RangeMultiVectorType>
586 void
587 upperTriSolveCsr (RangeMultiVectorType X,
588  const CrsMatrixType& A,
589  DomainMultiVectorType Y)
590 {
591  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
592  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
593  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
594 
595  const local_ordinal_type numRows = A.numRows ();
596  //const local_ordinal_type numCols = A.numCols ();
597  const local_ordinal_type numVecs = X.dimension_1 ();
598  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
599  typename CrsMatrixType::index_type ind = A.graph.entries;
600  typename CrsMatrixType::values_type val = A.values;
601 
602  // If local_ordinal_type is unsigned and numRows is 0, the loop
603  // below will have entirely the wrong number of iterations.
604  if (numRows == 0) {
605  return;
606  }
607 
608  // Don't use r >= 0 as the test, because that fails if
609  // local_ordinal_type is unsigned. We do r == 0 (last
610  // iteration) below.
611  for (local_ordinal_type r = numRows - 1; r != 0; --r) {
612  for (local_ordinal_type j = 0; j < numVecs; ++j) {
613  X(r, j) = Y(r, j);
614  }
615  const offset_type beg = ptr(r);
616  const offset_type end = ptr(r+1);
617  // We assume the diagonal entry is first in the row.
618  const matrix_scalar_type A_rr = val(beg);
619  for (offset_type k = beg + static_cast<offset_type> (1); k < end; ++k) {
620  const matrix_scalar_type A_rc = val(k);
621  const local_ordinal_type c = ind(k);
622  for (local_ordinal_type j = 0; j < numVecs; ++j) {
623  X(r, j) -= A_rc * X(c, j);
624  }
625  } // for each entry A_rc in the current row r
626  for (local_ordinal_type j = 0; j < numVecs; ++j) {
627  X(r, j) /= A_rr;
628  }
629  } // for each row r
630 
631  // Last iteration: r = 0.
632  {
633  const local_ordinal_type r = 0;
634  for (local_ordinal_type j = 0; j < numVecs; ++j) {
635  X(r, j) = Y(r, j);
636  }
637  const offset_type beg = ptr(r);
638  const offset_type end = ptr(r+1);
639  // We assume the diagonal entry is first in the row.
640  const matrix_scalar_type A_rr = val(beg);
641  for (size_t k = beg + 1; k < end; ++k) {
642  const matrix_scalar_type A_rc = val(k);
643  const local_ordinal_type c = ind(k);
644  for (local_ordinal_type j = 0; j < numVecs; ++j) {
645  X(r, j) -= A_rc * X(c, j);
646  }
647  } // for each entry A_rc in the current row r
648  for (local_ordinal_type j = 0; j < numVecs; ++j) {
649  X(r, j) /= A_rr;
650  }
651  } // last iteration: r = 0
652 }
653 
654 
655 template<class CrsMatrixType,
656  class DomainMultiVectorType,
657  class RangeMultiVectorType>
658 void
659 upperTriSolveCscUnitDiag (RangeMultiVectorType X,
660  const CrsMatrixType& A,
661  DomainMultiVectorType Y)
662 {
663  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
664  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
665  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
666 
667  const local_ordinal_type numRows = A.numRows ();
668  const local_ordinal_type numCols = A.numCols ();
669  const local_ordinal_type numVecs = X.dimension_1 ();
670  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
671  typename CrsMatrixType::index_type ind = A.graph.entries;
672  typename CrsMatrixType::values_type val = A.values;
673 
674  for (local_ordinal_type j = 0; j < numVecs; ++j) {
675  for (local_ordinal_type i = 0; i < numRows; ++i) {
676  X(i, j) = Y(i, j);
677  }
678  }
679 
680  // If local_ordinal_type is unsigned and numCols is 0, the loop
681  // below will have entirely the wrong number of iterations.
682  if (numCols == 0) {
683  return;
684  }
685 
686  // Don't use c >= 0 as the test, because that fails if
687  // local_ordinal_type is unsigned. We do c == 0 (last
688  // iteration) below.
689  for (local_ordinal_type c = numCols - 1; c != 0; --c) {
690  const offset_type beg = ptr(c);
691  const offset_type end = ptr(c+1);
692  for (offset_type k = beg; k < end; ++k) {
693  const matrix_scalar_type A_rc = val(k);
694  const local_ordinal_type r = ind(k);
695  for (local_ordinal_type j = 0; j < numVecs; ++j) {
696  X(r, j) -= A_rc * X(c, j);
697  }
698  } // for each entry A_rc in the current column c
699  } // for each column c
700 
701  // Last iteration: c = 0.
702  {
703  const local_ordinal_type c = 0;
704  const offset_type beg = ptr(c);
705  const offset_type end = ptr(c+1);
706  for (offset_type k = beg; k < end; ++k) {
707  const matrix_scalar_type A_rc = val(k);
708  const local_ordinal_type r = ind(k);
709  for (local_ordinal_type j = 0; j < numVecs; ++j) {
710  X(r, j) -= A_rc * X(c, j);
711  }
712  } // for each entry A_rc in the current column c
713  }
714 }
715 
716 
717 template<class CrsMatrixType,
718  class DomainMultiVectorType,
719  class RangeMultiVectorType>
720 void
721 upperTriSolveCsc (RangeMultiVectorType X,
722  const CrsMatrixType& A,
723  DomainMultiVectorType Y)
724 {
725  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
726  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
727  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
729 
730  const local_ordinal_type numRows = A.numRows ();
731  const local_ordinal_type numCols = A.numCols ();
732  const local_ordinal_type numVecs = X.dimension_1 ();
733  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
734  typename CrsMatrixType::index_type ind = A.graph.entries;
735  typename CrsMatrixType::values_type val = A.values;
736 
737  for (local_ordinal_type j = 0; j < numVecs; ++j) {
738  for (local_ordinal_type i = 0; i < numRows; ++i) {
739  X(i, j) = Y(i, j);
740  }
741  }
742 
743  // If local_ordinal_type is unsigned and numCols is 0, the loop
744  // below will have entirely the wrong number of iterations.
745  if (numCols == 0) {
746  return;
747  }
748 
749  // Don't use c >= 0 as the test, because that fails if
750  // local_ordinal_type is unsigned. We do c == 0 (last
751  // iteration) below.
752  for (local_ordinal_type c = numCols - 1; c != 0; --c) {
753  matrix_scalar_type A_cc = STS::zero ();
754  const offset_type beg = ptr(c);
755  const offset_type end = ptr(c+1);
756  for (offset_type k = beg; k < end; ++k) {
757  const local_ordinal_type r = ind(k);
758  const matrix_scalar_type A_rc = val(k);
759  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
760  // has equal local row and column indices. That may not
761  // necessarily hold, depending on the row and column Maps. See
762  // note above.
763  if (r == c) {
764  A_cc += A_rc;
765  } else {
766  for (local_ordinal_type j = 0; j < numVecs; ++j) {
767  X(r, j) -= A_rc * X(c, j);
768  }
769  }
770  } // for each entry A_rc in the current column c
771  for (local_ordinal_type j = 0; j < numVecs; ++j) {
772  X(c, j) /= A_cc;
773  }
774  } // for each column c
775 
776  // Last iteration: c = 0.
777  {
778  const local_ordinal_type c = 0;
779  matrix_scalar_type A_cc = STS::zero ();
780  const offset_type beg = ptr(c);
781  const offset_type end = ptr(c+1);
782  for (offset_type k = beg; k < end; ++k) {
783  const local_ordinal_type r = ind(k);
784  const matrix_scalar_type A_rc = val(k);
785  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
786  // has equal local row and column indices. That may not
787  // necessarily hold, depending on the row and column Maps. See
788  // note above.
789  if (r == c) {
790  A_cc += A_rc;
791  } else {
792  for (local_ordinal_type j = 0; j < numVecs; ++j) {
793  X(r, j) -= A_rc * X(c, j);
794  }
795  }
796  } // for each entry A_rc in the current column c
797  for (local_ordinal_type j = 0; j < numVecs; ++j) {
798  X(c, j) /= A_cc;
799  }
800  }
801 }
802 
803 
804 template<class CrsMatrixType,
805  class DomainMultiVectorType,
806  class RangeMultiVectorType>
807 void
808 lowerTriSolveCscUnitDiag (RangeMultiVectorType X,
809  const CrsMatrixType& A,
810  DomainMultiVectorType Y)
811 {
812  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
813  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
814  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
815 
816  const local_ordinal_type numRows = A.numRows ();
817  const local_ordinal_type numCols = A.numCols ();
818  const local_ordinal_type numVecs = X.dimension_1 ();
819  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
820  typename CrsMatrixType::index_type ind = A.graph.entries;
821  typename CrsMatrixType::values_type val = A.values;
822 
823  for (local_ordinal_type j = 0; j < numVecs; ++j) {
824  for (local_ordinal_type i = 0; i < numRows; ++i) {
825  X(i, j) = Y(i, j);
826  }
827  }
828 
829  for (local_ordinal_type c = 0; c < numCols; ++c) {
830  const offset_type beg = ptr(c);
831  const offset_type end = ptr(c+1);
832  for (offset_type k = beg; k < end; ++k) {
833  const local_ordinal_type r = ind(k);
834  const matrix_scalar_type A_rc = val(k);
835  for (local_ordinal_type j = 0; j < numVecs; ++j) {
836  X(r, j) -= A_rc * X(c, j);
837  }
838  } // for each entry A_rc in the current column c
839  } // for each column c
840 }
841 
842 
843 template<class CrsMatrixType,
844  class DomainMultiVectorType,
845  class RangeMultiVectorType>
846 void
847 upperTriSolveCscUnitDiagConj (RangeMultiVectorType X,
848  const CrsMatrixType& A,
849  DomainMultiVectorType Y)
850 {
851  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
852  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
853  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
855 
856  const local_ordinal_type numRows = A.numRows ();
857  const local_ordinal_type numCols = A.numCols ();
858  const local_ordinal_type numVecs = X.dimension_1 ();
859  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
860  typename CrsMatrixType::index_type ind = A.graph.entries;
861  typename CrsMatrixType::values_type val = A.values;
862 
863  for (local_ordinal_type j = 0; j < numVecs; ++j) {
864  for (local_ordinal_type i = 0; i < numRows; ++i) {
865  X(i, j) = Y(i, j);
866  }
867  }
868 
869  // If local_ordinal_type is unsigned and numCols is 0, the loop
870  // below will have entirely the wrong number of iterations.
871  if (numCols == 0) {
872  return;
873  }
874 
875  // Don't use c >= 0 as the test, because that fails if
876  // local_ordinal_type is unsigned. We do c == 0 (last
877  // iteration) below.
878  for (local_ordinal_type c = numCols - 1; c != 0; --c) {
879  const offset_type beg = ptr(c);
880  const offset_type end = ptr(c+1);
881  for (offset_type k = beg; k < end; ++k) {
882  const local_ordinal_type r = ind(k);
883  const matrix_scalar_type A_rc = STS::conj (val(k));
884  for (local_ordinal_type j = 0; j < numVecs; ++j) {
885  X(r, j) -= A_rc * X(c, j);
886  }
887  } // for each entry A_rc in the current column c
888  } // for each column c
889 
890  // Last iteration: c = 0.
891  {
892  const local_ordinal_type c = 0;
893  const offset_type beg = ptr(c);
894  const offset_type end = ptr(c+1);
895  for (offset_type k = beg; k < end; ++k) {
896  const local_ordinal_type r = ind(k);
897  const matrix_scalar_type A_rc = STS::conj (val(k));
898  for (local_ordinal_type j = 0; j < numVecs; ++j) {
899  X(r, j) -= A_rc * X(c, j);
900  }
901  } // for each entry A_rc in the current column c
902  }
903 }
904 
905 
906 template<class CrsMatrixType,
907  class DomainMultiVectorType,
908  class RangeMultiVectorType>
909 void
910 upperTriSolveCscConj (RangeMultiVectorType X,
911  const CrsMatrixType& A,
912  DomainMultiVectorType Y)
913 {
914  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
915  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
916  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
918 
919  const local_ordinal_type numRows = A.numRows ();
920  const local_ordinal_type numCols = A.numCols ();
921  const local_ordinal_type numVecs = X.dimension_1 ();
922  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
923  typename CrsMatrixType::index_type ind = A.graph.entries;
924  typename CrsMatrixType::values_type val = A.values;
925 
926  for (local_ordinal_type j = 0; j < numVecs; ++j) {
927  for (local_ordinal_type i = 0; i < numRows; ++i) {
928  X(i, j) = Y(i, j);
929  }
930  }
931 
932  // If local_ordinal_type is unsigned and numCols is 0, the loop
933  // below will have entirely the wrong number of iterations.
934  if (numCols == 0) {
935  return;
936  }
937 
938  // Don't use c >= 0 as the test, because that fails if
939  // local_ordinal_type is unsigned. We do c == 0 (last
940  // iteration) below.
941  for (local_ordinal_type c = numCols - 1; c != 0; --c) {
942  matrix_scalar_type A_cc = STS::zero ();
943  const offset_type beg = ptr(c);
944  const offset_type end = ptr(c+1);
945  for (offset_type k = beg; k < end; ++k) {
946  const local_ordinal_type r = ind(k);
947  const matrix_scalar_type A_rc = STS::conj (val(k));
948  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
949  // has equal local row and column indices. That may not
950  // necessarily hold, depending on the row and column Maps. See
951  // note above.
952  if (r == c) {
953  A_cc += A_rc;
954  } else {
955  for (local_ordinal_type j = 0; j < numVecs; ++j) {
956  X(r, j) -= A_rc * X(c, j);
957  }
958  }
959  } // for each entry A_rc in the current column c
960  for (local_ordinal_type j = 0; j < numVecs; ++j) {
961  X(c, j) /= A_cc;
962  }
963  } // for each column c
964 
965  // Last iteration: c = 0.
966  {
967  const local_ordinal_type c = 0;
968  matrix_scalar_type A_cc = STS::zero ();
969  const offset_type beg = ptr(c);
970  const offset_type end = ptr(c+1);
971  for (offset_type k = beg; k < end; ++k) {
972  const local_ordinal_type r = ind(k);
973  const matrix_scalar_type A_rc = STS::conj (val(k));
974  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
975  // has equal local row and column indices. That may not
976  // necessarily hold, depending on the row and column Maps. See
977  // note above.
978  if (r == c) {
979  A_cc += A_rc;
980  } else {
981  for (local_ordinal_type j = 0; j < numVecs; ++j) {
982  X(r, j) -= A_rc * X(c, j);
983  }
984  }
985  } // for each entry A_rc in the current column c
986  for (local_ordinal_type j = 0; j < numVecs; ++j) {
987  X(c, j) /= A_cc;
988  }
989  }
990 }
991 
992 
993 template<class CrsMatrixType,
994  class DomainMultiVectorType,
995  class RangeMultiVectorType>
996 void
997 lowerTriSolveCsc (RangeMultiVectorType X,
998  const CrsMatrixType& A,
999  DomainMultiVectorType Y)
1000 {
1001  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1002  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1003  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1005 
1006  const local_ordinal_type numRows = A.numRows ();
1007  const local_ordinal_type numCols = A.numCols ();
1008  const local_ordinal_type numVecs = X.dimension_1 ();
1009  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1010  typename CrsMatrixType::index_type ind = A.graph.entries;
1011  typename CrsMatrixType::values_type val = A.values;
1012 
1013  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1014  for (local_ordinal_type i = 0; i < numRows; ++i) {
1015  X(i, j) = Y(i, j);
1016  }
1017  }
1018 
1019  for (local_ordinal_type c = 0; c < numCols; ++c) {
1020  matrix_scalar_type A_cc = STS::zero ();
1021  const offset_type beg = ptr(c);
1022  const offset_type end = ptr(c+1);
1023  for (offset_type k = beg; k < end; ++k) {
1024  const local_ordinal_type r = ind(k);
1025  const matrix_scalar_type A_rc = val(k);
1026  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
1027  // has equal local row and column indices. That may not
1028  // necessarily hold, depending on the row and column Maps. See
1029  // note above.
1030  if (r == c) {
1031  A_cc += A_rc;
1032  } else {
1033  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1034  X(r, j) -= A_rc * X(c, j);
1035  }
1036  }
1037  } // for each entry A_rc in the current column c
1038  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1039  X(c, j) /= A_cc;
1040  }
1041  } // for each column c
1042 }
1043 
1044 
1045 template<class CrsMatrixType,
1046  class DomainMultiVectorType,
1047  class RangeMultiVectorType>
1048 void
1049 lowerTriSolveCscUnitDiagConj (RangeMultiVectorType X,
1050  const CrsMatrixType& A,
1051  DomainMultiVectorType Y)
1052 {
1053  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1054  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1055  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1057 
1058  const local_ordinal_type numRows = A.numRows ();
1059  const local_ordinal_type numCols = A.numCols ();
1060  const local_ordinal_type numVecs = X.dimension_1 ();
1061  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1062  typename CrsMatrixType::index_type ind = A.graph.entries;
1063  typename CrsMatrixType::values_type val = A.values;
1064 
1065  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1066  for (local_ordinal_type i = 0; i < numRows; ++i) {
1067  X(i, j) = Y(i, j);
1068  }
1069  }
1070 
1071  for (local_ordinal_type c = 0; c < numCols; ++c) {
1072  const offset_type beg = ptr(c);
1073  const offset_type end = ptr(c+1);
1074  for (offset_type k = beg; k < end; ++k) {
1075  const local_ordinal_type r = ind(k);
1076  const matrix_scalar_type A_rc = STS::conj (val(k));
1077  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1078  X(r, j) -= A_rc * X(c, j);
1079  }
1080  } // for each entry A_rc in the current column c
1081  } // for each column c
1082 }
1083 
1084 
1085 template<class CrsMatrixType,
1086  class DomainMultiVectorType,
1087  class RangeMultiVectorType>
1088 void
1089 lowerTriSolveCscConj (RangeMultiVectorType X,
1090  const CrsMatrixType& A,
1091  DomainMultiVectorType Y)
1092 {
1093  typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1094  typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1095  typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1097 
1098  const local_ordinal_type numRows = A.numRows ();
1099  const local_ordinal_type numCols = A.numCols ();
1100  const local_ordinal_type numVecs = X.dimension_1 ();
1101  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1102  typename CrsMatrixType::index_type ind = A.graph.entries;
1103  typename CrsMatrixType::values_type val = A.values;
1104 
1105  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1106  for (local_ordinal_type i = 0; i < numRows; ++i) {
1107  X(i, j) = Y(i, j);
1108  }
1109  }
1110 
1111  for (local_ordinal_type c = 0; c < numCols; ++c) {
1112  matrix_scalar_type A_cc = STS::zero ();
1113  const offset_type beg = ptr(c);
1114  const offset_type end = ptr(c+1);
1115  for (offset_type k = beg; k < end; ++k) {
1116  const local_ordinal_type r = ind(k);
1117  const matrix_scalar_type A_rc = STS::conj (val(k));
1118  // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
1119  // has equal local row and column indices. That may not
1120  // necessarily hold, depending on the row and column Maps. See
1121  // note above.
1122  if (r == c) {
1123  A_cc += A_rc;
1124  } else {
1125  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1126  X(r, j) -= A_rc * X(c, j);
1127  }
1128  }
1129  } // for each entry A_rc in the current column c
1130  for (local_ordinal_type j = 0; j < numVecs; ++j) {
1131  X(c, j) /= A_cc;
1132  }
1133  } // for each column c
1134 }
1135 
1136 
1137 template<class CrsMatrixType,
1138  class DomainMultiVectorType,
1139  class RangeMultiVectorType>
1140 void
1141 triSolveKokkos (RangeMultiVectorType X,
1142  const CrsMatrixType& A,
1143  DomainMultiVectorType Y,
1144  const Teuchos::EUplo triUplo,
1145  const Teuchos::EDiag unitDiag,
1146  const Teuchos::ETransp trans)
1147 {
1148  typedef typename CrsMatrixType::index_type::non_const_value_type LO;
1149  const char prefix[] = "Kokkos::Sequential::triSolveKokkos: ";
1150  const LO numRows = A.numRows ();
1151  const LO numCols = A.numCols ();
1152  const LO numVecs = X.dimension_1 ();
1153  typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1154 
1156  triUplo != Teuchos::LOWER_TRI && triUplo != Teuchos::UPPER_TRI &&
1157  triUplo != Teuchos::UNDEF_TRI,
1158  std::invalid_argument, prefix << "triUplo has an invalid value " << triUplo
1159  << ". Valid values are Teuchos::LOWER_TRI=" << Teuchos::LOWER_TRI <<
1160  ", Teuchos::UPPER_TRI=" << Teuchos::UPPER_TRI << ", and Teuchos::UNDEF_TRI="
1161  << Teuchos::UNDEF_TRI << ".");
1163  triUplo == Teuchos::UNDEF_TRI, std::invalid_argument, prefix <<
1164  "The matrix is neither lower nor upper triangular (triUplo="
1165  "Teuchos::UNDEF_TRI), so you may not call this method.");
1167  unitDiag != Teuchos::UNIT_DIAG && unitDiag != Teuchos::NON_UNIT_DIAG,
1168  std::invalid_argument, prefix << "unitDiag has an invalid value "
1169  << unitDiag << ". Valid values are Teuchos::UNIT_DIAG="
1170  << Teuchos::UNIT_DIAG << " and Teuchos::NON_UNIT_DIAG="
1171  << Teuchos::NON_UNIT_DIAG << ".");
1173  unitDiag != Teuchos::UNIT_DIAG && numRows > 0 && ptr(numRows) == 0,
1174  std::invalid_argument, prefix << "Triangular solve with an empty matrix "
1175  "is only valid if the matrix has an implicit unit diagonal. This matrix "
1176  "does not.");
1178  trans != Teuchos::NO_TRANS && trans != Teuchos::TRANS &&
1179  trans != Teuchos::CONJ_TRANS,
1180  std::invalid_argument, prefix << "trans has an invalid value " << trans
1181  << ". Valid values are Teuchos::NO_TRANS=" << Teuchos::NO_TRANS << ", "
1182  << "Teuchos::TRANS=" << Teuchos::TRANS << ", and Teuchos::CONJ_TRANS="
1183  << Teuchos::CONJ_TRANS << ".");
1184 
1186  numRows != static_cast<LO> (X.dimension_0 ()), std::invalid_argument,
1187  prefix << "numRows = " << numRows << " != X.dimension_0() = " <<
1188  X.dimension_0 () << ".");
1190  numCols != static_cast<LO> (Y.dimension_0 ()), std::invalid_argument,
1191  prefix << "numCols = " << numCols << " != Y.dimension_0() = " <<
1192  Y.dimension_0 () << ".");
1194  numVecs != static_cast<LO> (Y.dimension_1 ()), std::invalid_argument,
1195  prefix << "X.dimension_1 () = " << numVecs << " != Y.dimension_1 () = "
1196  << Y.dimension_1 () << ".");
1197 
1198  if (trans == Teuchos::NO_TRANS) { // no transpose
1199  if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
1200  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1201  lowerTriSolveCsrUnitDiag (X, A, Y);
1202  } else { // non unit diagonal
1203  lowerTriSolveCsr (X, A, Y);
1204  }
1205  } else { // upper triangular
1206  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1207  upperTriSolveCsrUnitDiag (X, A, Y);
1208  } else { // non unit diagonal
1209  upperTriSolveCsr (X, A, Y);
1210  }
1211  }
1212  }
1213  else if (trans == Teuchos::TRANS) { // transpose
1214  if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
1215  // Transposed lower tri CSR => upper tri CSC.
1216  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1217  upperTriSolveCscUnitDiag (X, A, Y);
1218  } else { // non unit diagonal
1219  upperTriSolveCsc (X, A, Y);
1220  }
1221  }
1222  else { // upper triangular
1223  // Transposed upper tri CSR => lower tri CSC.
1224  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1225  lowerTriSolveCscUnitDiag (X, A, Y);
1226  } else { // non unit diagonal
1227  lowerTriSolveCsc (X, A, Y);
1228  }
1229  }
1230  }
1231  else if (trans == Teuchos::CONJ_TRANS) { // conj transpose
1232  if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
1233  // Transposed lower tri CSR => upper tri CSC.
1234  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1235  upperTriSolveCscUnitDiagConj (X, A, Y);
1236  } else { // non unit diagonal
1237  upperTriSolveCscConj (X, A, Y);
1238  }
1239  }
1240  else { // upper triangular
1241  // Transposed upper tri CSR => lower tri CSC.
1242  if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
1243  lowerTriSolveCscUnitDiagConj (X, A, Y);
1244  } else { // non unit diagonal
1245  lowerTriSolveCscConj (X, A, Y);
1246  }
1247  }
1248  }
1249 }
1250 
1251 
1252 } // namespace Sequential
1253 } // namespace Kokkos
1254 
1255 #endif // KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static KOKKOS_FORCEINLINE_FUNCTION T zero()
The zero value of T; the arithmetic identity.
Traits class for arithmetic on type T.
Declaration and definition of Kokkos::Details::ArithTraits.