44 #ifndef KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
45 #define KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
65 #include <KokkosLinAlg_config.h>
69 namespace Sequential {
97 template<
class LocalOrdinal,
103 gaussSeidel (
const LocalOrdinal numRows,
104 const LocalOrdinal numCols,
105 const OffsetType*
const ptr,
106 const LocalOrdinal*
const ind,
107 const MatrixScalar*
const val,
108 const DomainScalar*
const B,
109 const OffsetType b_stride,
110 RangeScalar*
const X,
111 const OffsetType x_stride,
112 const MatrixScalar*
const D,
113 const MatrixScalar omega,
114 const Kokkos::ESweepDirection direction)
117 typedef LocalOrdinal LO;
118 const OffsetType theNumRows =
static_cast<OffsetType
> (numRows);
119 const OffsetType theNumCols =
static_cast<OffsetType
> (numCols);
121 if (numRows == 0 || numCols == 0) {
124 else if (numRows > 0 && ptr[numRows] == 0) {
130 const MatrixScalar oneMinusOmega =
131 ArithTraits<MatrixScalar>::one () - omega;
132 for (OffsetType j = 0; j < theNumCols; ++j) {
133 RangeScalar*
const x_j = X + j*x_stride;
134 const DomainScalar*
const b_j = B + j*b_stride;
135 for (OffsetType i = 0; i < theNumRows; ++i) {
136 x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
143 if (direction == Kokkos::Forward) {
144 for (LO i = 0; i < numRows; ++i) {
145 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
146 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
148 const MatrixScalar A_ij = val[k];
149 x_temp += A_ij * X[j];
151 X[i] += omega * D[i] * (B[i] - x_temp);
153 }
else if (direction == Kokkos::Backward) {
157 for (LO i = numRows - 1; i != 0; --i) {
158 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
159 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
161 const MatrixScalar A_ij = val[k];
162 x_temp += A_ij * X[j];
164 X[i] += omega * D[i] * (B[i] - x_temp);
168 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
169 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
171 const MatrixScalar A_ij = val[k];
172 x_temp += A_ij * X[j];
174 X[i] += omega * D[i] * (B[i] - x_temp);
186 RangeScalar*
const x_temp = temp.getRawPtr ();
188 if (direction == Kokkos::Forward) {
189 for (LO i = 0; i < numRows; ++i) {
190 for (OffsetType c = 0; c < theNumCols; ++c) {
191 x_temp[c] = ArithTraits<RangeScalar>::zero ();
193 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
195 const MatrixScalar A_ij = val[k];
196 for (OffsetType c = 0; c < theNumCols; ++c) {
197 x_temp[c] += A_ij * X[j + x_stride*c];
200 for (OffsetType c = 0; c < theNumCols; ++c) {
201 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
204 }
else if (direction == Kokkos::Backward) {
208 for (LO i = numRows - 1; i != 0; --i) {
209 for (OffsetType c = 0; c < theNumCols; ++c) {
210 x_temp[c] = ArithTraits<RangeScalar>::zero ();
212 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
214 const MatrixScalar A_ij = val[k];
215 for (OffsetType c = 0; c < theNumCols; ++c) {
216 x_temp[c] += A_ij * X[j + x_stride*c];
219 for (OffsetType c = 0; c < theNumCols; ++c) {
220 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
225 for (OffsetType c = 0; c < theNumCols; ++c) {
226 x_temp[c] = ArithTraits<RangeScalar>::zero ();
228 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
230 const MatrixScalar A_ij = val[k];
231 for (OffsetType c = 0; c < theNumCols; ++c) {
232 x_temp[c] += A_ij * X[j + x_stride*c];
235 for (OffsetType c = 0; c < theNumCols; ++c) {
236 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
277 template<
class LocalOrdinal,
283 reorderedGaussSeidel (
const LocalOrdinal numRows,
284 const LocalOrdinal numCols,
285 const OffsetType*
const ptr,
286 const LocalOrdinal*
const ind,
287 const MatrixScalar*
const val,
288 const DomainScalar*
const B,
289 const OffsetType b_stride,
290 RangeScalar*
const X,
291 const OffsetType x_stride,
292 const MatrixScalar*
const D,
293 const LocalOrdinal*
const rowInd,
294 const LocalOrdinal numRowInds,
295 const MatrixScalar omega,
296 const Kokkos::ESweepDirection direction)
299 typedef LocalOrdinal LO;
300 const OffsetType theNumRows =
static_cast<OffsetType
> (numRows);
301 const OffsetType theNumCols =
static_cast<OffsetType
> (numCols);
303 if (numRows == 0 || numCols == 0) {
306 else if (numRows > 0 && ptr[numRows] == 0) {
312 const MatrixScalar oneMinusOmega =
313 ArithTraits<MatrixScalar>::one () - omega;
314 for (OffsetType j = 0; j < theNumCols; ++j) {
315 RangeScalar*
const x_j = X + j*x_stride;
316 const DomainScalar*
const b_j = B + j*b_stride;
317 for (OffsetType i = 0; i < theNumRows; ++i) {
318 x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
325 if (direction == Kokkos::Forward) {
326 for (LO ii = 0; ii < numRowInds; ++ii) {
328 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
329 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
331 const MatrixScalar A_ij = val[k];
332 x_temp += A_ij * X[j];
334 X[i] += omega * D[i] * (B[i] - x_temp);
336 }
else if (direction == Kokkos::Backward) {
340 for (LO ii = numRowInds - 1; ii != 0; --ii) {
342 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
343 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
345 const MatrixScalar A_ij = val[k];
346 x_temp += A_ij * X[j];
348 X[i] += omega * D[i] * (B[i] - x_temp);
353 RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
354 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
356 const MatrixScalar A_ij = val[k];
357 x_temp += A_ij * X[j];
359 X[i] += omega * D[i] * (B[i] - x_temp);
371 RangeScalar*
const x_temp = temp.getRawPtr ();
373 if (direction == Kokkos::Forward) {
374 for (LO ii = 0; ii < numRowInds; ++ii) {
376 for (OffsetType c = 0; c < theNumCols; ++c) {
379 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
381 const MatrixScalar A_ij = val[k];
382 for (OffsetType c = 0; c < theNumCols; ++c) {
383 x_temp[c] += A_ij * X[j + x_stride*c];
386 for (OffsetType c = 0; c < theNumCols; ++c) {
387 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
390 }
else if (direction == Kokkos::Backward) {
394 for (LO ii = numRowInds - 1; ii != 0; --ii) {
396 for (OffsetType c = 0; c < theNumCols; ++c) {
399 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
401 const MatrixScalar A_ij = val[k];
402 for (OffsetType c = 0; c < theNumCols; ++c) {
403 x_temp[c] += A_ij * X[j + x_stride*c];
406 for (OffsetType c = 0; c < theNumCols; ++c) {
407 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
413 for (OffsetType c = 0; c < theNumCols; ++c) {
416 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
418 const MatrixScalar A_ij = val[k];
419 for (OffsetType c = 0; c < theNumCols; ++c) {
420 x_temp[c] += A_ij * X[j + x_stride*c];
423 for (OffsetType c = 0; c < theNumCols; ++c) {
424 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
432 template<
class CrsMatrixType,
433 class DomainMultiVectorType,
434 class RangeMultiVectorType>
436 lowerTriSolveCsrUnitDiag (RangeMultiVectorType X,
437 const CrsMatrixType& A,
438 DomainMultiVectorType Y)
440 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
441 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
442 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
444 const local_ordinal_type numRows = A.numRows ();
446 const local_ordinal_type numVecs = X.dimension_1 ();
447 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
448 typename CrsMatrixType::index_type ind = A.graph.entries;
449 typename CrsMatrixType::values_type val = A.values;
451 for (local_ordinal_type r = 0; r < numRows; ++r) {
452 for (local_ordinal_type j = 0; j < numVecs; ++j) {
455 const offset_type beg = ptr(r);
456 const offset_type end = ptr(r+1);
457 for (offset_type k = beg; k < end; ++k) {
458 const matrix_scalar_type A_rc = val(k);
459 const local_ordinal_type c = ind(k);
460 for (local_ordinal_type j = 0; j < numVecs; ++j) {
461 X(r, j) -= A_rc * X(c, j);
468 template<
class CrsMatrixType,
469 class DomainMultiVectorType,
470 class RangeMultiVectorType>
472 lowerTriSolveCsr (RangeMultiVectorType X,
473 const CrsMatrixType& A,
474 DomainMultiVectorType Y)
476 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
477 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
478 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
481 const local_ordinal_type numRows = A.numRows ();
483 const local_ordinal_type numVecs = X.dimension_1 ();
484 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
485 typename CrsMatrixType::index_type ind = A.graph.entries;
486 typename CrsMatrixType::values_type val = A.values;
488 for (local_ordinal_type r = 0; r < numRows; ++r) {
489 for (local_ordinal_type j = 0; j < numVecs; ++j) {
493 matrix_scalar_type A_rr = STS::zero ();
494 const offset_type beg = ptr(r);
495 const offset_type end = ptr(r+1);
497 for (offset_type k = beg; k < end; ++k) {
498 const matrix_scalar_type A_rc = val(k);
499 const local_ordinal_type c = ind(k);
509 for (local_ordinal_type j = 0; j < numVecs; ++j) {
510 X(r, j) -= A_rc * X(c, j);
514 for (local_ordinal_type j = 0; j < numVecs; ++j) {
521 template<
class CrsMatrixType,
522 class DomainMultiVectorType,
523 class RangeMultiVectorType>
525 upperTriSolveCsrUnitDiag (RangeMultiVectorType X,
526 const CrsMatrixType& A,
527 DomainMultiVectorType Y)
529 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
530 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
531 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
533 const local_ordinal_type numRows = A.numRows ();
535 const local_ordinal_type numVecs = X.dimension_1 ();
536 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
537 typename CrsMatrixType::index_type ind = A.graph.entries;
538 typename CrsMatrixType::values_type val = A.values;
549 for (local_ordinal_type r = numRows - 1; r != 0; --r) {
550 for (local_ordinal_type j = 0; j < numVecs; ++j) {
553 const offset_type beg = ptr(r);
554 const offset_type end = ptr(r+1);
555 for (offset_type k = beg; k < end; ++k) {
556 const matrix_scalar_type A_rc = val(k);
557 const local_ordinal_type c = ind(k);
558 for (local_ordinal_type j = 0; j < numVecs; ++j) {
559 X(r, j) -= A_rc * X(c, j);
566 const local_ordinal_type r = 0;
567 for (local_ordinal_type j = 0; j < numVecs; ++j) {
570 const offset_type beg = ptr(r);
571 const offset_type end = ptr(r+1);
572 for (offset_type k = beg; k < end; ++k) {
573 const matrix_scalar_type A_rc = val(k);
574 const local_ordinal_type c = ind(k);
575 for (local_ordinal_type j = 0; j < numVecs; ++j) {
576 X(r, j) -= A_rc * X(c, j);
583 template<
class CrsMatrixType,
584 class DomainMultiVectorType,
585 class RangeMultiVectorType>
587 upperTriSolveCsr (RangeMultiVectorType X,
588 const CrsMatrixType& A,
589 DomainMultiVectorType Y)
591 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
592 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
593 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
595 const local_ordinal_type numRows = A.numRows ();
597 const local_ordinal_type numVecs = X.dimension_1 ();
598 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
599 typename CrsMatrixType::index_type ind = A.graph.entries;
600 typename CrsMatrixType::values_type val = A.values;
611 for (local_ordinal_type r = numRows - 1; r != 0; --r) {
612 for (local_ordinal_type j = 0; j < numVecs; ++j) {
615 const offset_type beg = ptr(r);
616 const offset_type end = ptr(r+1);
618 const matrix_scalar_type A_rr = val(beg);
619 for (offset_type k = beg + static_cast<offset_type> (1); k < end; ++k) {
620 const matrix_scalar_type A_rc = val(k);
621 const local_ordinal_type c = ind(k);
622 for (local_ordinal_type j = 0; j < numVecs; ++j) {
623 X(r, j) -= A_rc * X(c, j);
626 for (local_ordinal_type j = 0; j < numVecs; ++j) {
633 const local_ordinal_type r = 0;
634 for (local_ordinal_type j = 0; j < numVecs; ++j) {
637 const offset_type beg = ptr(r);
638 const offset_type end = ptr(r+1);
640 const matrix_scalar_type A_rr = val(beg);
641 for (
size_t k = beg + 1; k < end; ++k) {
642 const matrix_scalar_type A_rc = val(k);
643 const local_ordinal_type c = ind(k);
644 for (local_ordinal_type j = 0; j < numVecs; ++j) {
645 X(r, j) -= A_rc * X(c, j);
648 for (local_ordinal_type j = 0; j < numVecs; ++j) {
655 template<
class CrsMatrixType,
656 class DomainMultiVectorType,
657 class RangeMultiVectorType>
659 upperTriSolveCscUnitDiag (RangeMultiVectorType X,
660 const CrsMatrixType& A,
661 DomainMultiVectorType Y)
663 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
664 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
665 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
667 const local_ordinal_type numRows = A.numRows ();
668 const local_ordinal_type numCols = A.numCols ();
669 const local_ordinal_type numVecs = X.dimension_1 ();
670 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
671 typename CrsMatrixType::index_type ind = A.graph.entries;
672 typename CrsMatrixType::values_type val = A.values;
674 for (local_ordinal_type j = 0; j < numVecs; ++j) {
675 for (local_ordinal_type i = 0; i < numRows; ++i) {
689 for (local_ordinal_type c = numCols - 1; c != 0; --c) {
690 const offset_type beg = ptr(c);
691 const offset_type end = ptr(c+1);
692 for (offset_type k = beg; k < end; ++k) {
693 const matrix_scalar_type A_rc = val(k);
694 const local_ordinal_type r = ind(k);
695 for (local_ordinal_type j = 0; j < numVecs; ++j) {
696 X(r, j) -= A_rc * X(c, j);
703 const local_ordinal_type c = 0;
704 const offset_type beg = ptr(c);
705 const offset_type end = ptr(c+1);
706 for (offset_type k = beg; k < end; ++k) {
707 const matrix_scalar_type A_rc = val(k);
708 const local_ordinal_type r = ind(k);
709 for (local_ordinal_type j = 0; j < numVecs; ++j) {
710 X(r, j) -= A_rc * X(c, j);
717 template<
class CrsMatrixType,
718 class DomainMultiVectorType,
719 class RangeMultiVectorType>
721 upperTriSolveCsc (RangeMultiVectorType X,
722 const CrsMatrixType& A,
723 DomainMultiVectorType Y)
725 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
726 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
727 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
730 const local_ordinal_type numRows = A.numRows ();
731 const local_ordinal_type numCols = A.numCols ();
732 const local_ordinal_type numVecs = X.dimension_1 ();
733 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
734 typename CrsMatrixType::index_type ind = A.graph.entries;
735 typename CrsMatrixType::values_type val = A.values;
737 for (local_ordinal_type j = 0; j < numVecs; ++j) {
738 for (local_ordinal_type i = 0; i < numRows; ++i) {
752 for (local_ordinal_type c = numCols - 1; c != 0; --c) {
753 matrix_scalar_type A_cc = STS::zero ();
754 const offset_type beg = ptr(c);
755 const offset_type end = ptr(c+1);
756 for (offset_type k = beg; k < end; ++k) {
757 const local_ordinal_type r = ind(k);
758 const matrix_scalar_type A_rc = val(k);
766 for (local_ordinal_type j = 0; j < numVecs; ++j) {
767 X(r, j) -= A_rc * X(c, j);
771 for (local_ordinal_type j = 0; j < numVecs; ++j) {
778 const local_ordinal_type c = 0;
779 matrix_scalar_type A_cc = STS::zero ();
780 const offset_type beg = ptr(c);
781 const offset_type end = ptr(c+1);
782 for (offset_type k = beg; k < end; ++k) {
783 const local_ordinal_type r = ind(k);
784 const matrix_scalar_type A_rc = val(k);
792 for (local_ordinal_type j = 0; j < numVecs; ++j) {
793 X(r, j) -= A_rc * X(c, j);
797 for (local_ordinal_type j = 0; j < numVecs; ++j) {
804 template<
class CrsMatrixType,
805 class DomainMultiVectorType,
806 class RangeMultiVectorType>
808 lowerTriSolveCscUnitDiag (RangeMultiVectorType X,
809 const CrsMatrixType& A,
810 DomainMultiVectorType Y)
812 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
813 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
814 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
816 const local_ordinal_type numRows = A.numRows ();
817 const local_ordinal_type numCols = A.numCols ();
818 const local_ordinal_type numVecs = X.dimension_1 ();
819 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
820 typename CrsMatrixType::index_type ind = A.graph.entries;
821 typename CrsMatrixType::values_type val = A.values;
823 for (local_ordinal_type j = 0; j < numVecs; ++j) {
824 for (local_ordinal_type i = 0; i < numRows; ++i) {
829 for (local_ordinal_type c = 0; c < numCols; ++c) {
830 const offset_type beg = ptr(c);
831 const offset_type end = ptr(c+1);
832 for (offset_type k = beg; k < end; ++k) {
833 const local_ordinal_type r = ind(k);
834 const matrix_scalar_type A_rc = val(k);
835 for (local_ordinal_type j = 0; j < numVecs; ++j) {
836 X(r, j) -= A_rc * X(c, j);
843 template<
class CrsMatrixType,
844 class DomainMultiVectorType,
845 class RangeMultiVectorType>
847 upperTriSolveCscUnitDiagConj (RangeMultiVectorType X,
848 const CrsMatrixType& A,
849 DomainMultiVectorType Y)
851 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
852 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
853 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
856 const local_ordinal_type numRows = A.numRows ();
857 const local_ordinal_type numCols = A.numCols ();
858 const local_ordinal_type numVecs = X.dimension_1 ();
859 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
860 typename CrsMatrixType::index_type ind = A.graph.entries;
861 typename CrsMatrixType::values_type val = A.values;
863 for (local_ordinal_type j = 0; j < numVecs; ++j) {
864 for (local_ordinal_type i = 0; i < numRows; ++i) {
878 for (local_ordinal_type c = numCols - 1; c != 0; --c) {
879 const offset_type beg = ptr(c);
880 const offset_type end = ptr(c+1);
881 for (offset_type k = beg; k < end; ++k) {
882 const local_ordinal_type r = ind(k);
883 const matrix_scalar_type A_rc = STS::conj (val(k));
884 for (local_ordinal_type j = 0; j < numVecs; ++j) {
885 X(r, j) -= A_rc * X(c, j);
892 const local_ordinal_type c = 0;
893 const offset_type beg = ptr(c);
894 const offset_type end = ptr(c+1);
895 for (offset_type k = beg; k < end; ++k) {
896 const local_ordinal_type r = ind(k);
897 const matrix_scalar_type A_rc = STS::conj (val(k));
898 for (local_ordinal_type j = 0; j < numVecs; ++j) {
899 X(r, j) -= A_rc * X(c, j);
906 template<
class CrsMatrixType,
907 class DomainMultiVectorType,
908 class RangeMultiVectorType>
910 upperTriSolveCscConj (RangeMultiVectorType X,
911 const CrsMatrixType& A,
912 DomainMultiVectorType Y)
914 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
915 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
916 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
919 const local_ordinal_type numRows = A.numRows ();
920 const local_ordinal_type numCols = A.numCols ();
921 const local_ordinal_type numVecs = X.dimension_1 ();
922 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
923 typename CrsMatrixType::index_type ind = A.graph.entries;
924 typename CrsMatrixType::values_type val = A.values;
926 for (local_ordinal_type j = 0; j < numVecs; ++j) {
927 for (local_ordinal_type i = 0; i < numRows; ++i) {
941 for (local_ordinal_type c = numCols - 1; c != 0; --c) {
942 matrix_scalar_type A_cc = STS::zero ();
943 const offset_type beg = ptr(c);
944 const offset_type end = ptr(c+1);
945 for (offset_type k = beg; k < end; ++k) {
946 const local_ordinal_type r = ind(k);
947 const matrix_scalar_type A_rc = STS::conj (val(k));
955 for (local_ordinal_type j = 0; j < numVecs; ++j) {
956 X(r, j) -= A_rc * X(c, j);
960 for (local_ordinal_type j = 0; j < numVecs; ++j) {
967 const local_ordinal_type c = 0;
968 matrix_scalar_type A_cc = STS::zero ();
969 const offset_type beg = ptr(c);
970 const offset_type end = ptr(c+1);
971 for (offset_type k = beg; k < end; ++k) {
972 const local_ordinal_type r = ind(k);
973 const matrix_scalar_type A_rc = STS::conj (val(k));
981 for (local_ordinal_type j = 0; j < numVecs; ++j) {
982 X(r, j) -= A_rc * X(c, j);
986 for (local_ordinal_type j = 0; j < numVecs; ++j) {
993 template<
class CrsMatrixType,
994 class DomainMultiVectorType,
995 class RangeMultiVectorType>
997 lowerTriSolveCsc (RangeMultiVectorType X,
998 const CrsMatrixType& A,
999 DomainMultiVectorType Y)
1001 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1002 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1003 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1006 const local_ordinal_type numRows = A.numRows ();
1007 const local_ordinal_type numCols = A.numCols ();
1008 const local_ordinal_type numVecs = X.dimension_1 ();
1009 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1010 typename CrsMatrixType::index_type ind = A.graph.entries;
1011 typename CrsMatrixType::values_type val = A.values;
1013 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1014 for (local_ordinal_type i = 0; i < numRows; ++i) {
1019 for (local_ordinal_type c = 0; c < numCols; ++c) {
1020 matrix_scalar_type A_cc = STS::zero ();
1021 const offset_type beg = ptr(c);
1022 const offset_type end = ptr(c+1);
1023 for (offset_type k = beg; k < end; ++k) {
1024 const local_ordinal_type r = ind(k);
1025 const matrix_scalar_type A_rc = val(k);
1033 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1034 X(r, j) -= A_rc * X(c, j);
1038 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1045 template<
class CrsMatrixType,
1046 class DomainMultiVectorType,
1047 class RangeMultiVectorType>
1049 lowerTriSolveCscUnitDiagConj (RangeMultiVectorType X,
1050 const CrsMatrixType& A,
1051 DomainMultiVectorType Y)
1053 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1054 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1055 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1058 const local_ordinal_type numRows = A.numRows ();
1059 const local_ordinal_type numCols = A.numCols ();
1060 const local_ordinal_type numVecs = X.dimension_1 ();
1061 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1062 typename CrsMatrixType::index_type ind = A.graph.entries;
1063 typename CrsMatrixType::values_type val = A.values;
1065 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1066 for (local_ordinal_type i = 0; i < numRows; ++i) {
1071 for (local_ordinal_type c = 0; c < numCols; ++c) {
1072 const offset_type beg = ptr(c);
1073 const offset_type end = ptr(c+1);
1074 for (offset_type k = beg; k < end; ++k) {
1075 const local_ordinal_type r = ind(k);
1076 const matrix_scalar_type A_rc = STS::conj (val(k));
1077 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1078 X(r, j) -= A_rc * X(c, j);
1085 template<
class CrsMatrixType,
1086 class DomainMultiVectorType,
1087 class RangeMultiVectorType>
1089 lowerTriSolveCscConj (RangeMultiVectorType X,
1090 const CrsMatrixType& A,
1091 DomainMultiVectorType Y)
1093 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
1094 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
1095 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
1098 const local_ordinal_type numRows = A.numRows ();
1099 const local_ordinal_type numCols = A.numCols ();
1100 const local_ordinal_type numVecs = X.dimension_1 ();
1101 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1102 typename CrsMatrixType::index_type ind = A.graph.entries;
1103 typename CrsMatrixType::values_type val = A.values;
1105 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1106 for (local_ordinal_type i = 0; i < numRows; ++i) {
1111 for (local_ordinal_type c = 0; c < numCols; ++c) {
1112 matrix_scalar_type A_cc = STS::zero ();
1113 const offset_type beg = ptr(c);
1114 const offset_type end = ptr(c+1);
1115 for (offset_type k = beg; k < end; ++k) {
1116 const local_ordinal_type r = ind(k);
1117 const matrix_scalar_type A_rc = STS::conj (val(k));
1125 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1126 X(r, j) -= A_rc * X(c, j);
1130 for (local_ordinal_type j = 0; j < numVecs; ++j) {
1137 template<
class CrsMatrixType,
1138 class DomainMultiVectorType,
1139 class RangeMultiVectorType>
1141 triSolveKokkos (RangeMultiVectorType X,
1142 const CrsMatrixType& A,
1143 DomainMultiVectorType Y,
1148 typedef typename CrsMatrixType::index_type::non_const_value_type LO;
1149 const char prefix[] =
"Kokkos::Sequential::triSolveKokkos: ";
1150 const LO numRows = A.numRows ();
1151 const LO numCols = A.numCols ();
1152 const LO numVecs = X.dimension_1 ();
1153 typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
1158 std::invalid_argument, prefix <<
"triUplo has an invalid value " << triUplo
1164 "The matrix is neither lower nor upper triangular (triUplo="
1165 "Teuchos::UNDEF_TRI), so you may not call this method.");
1168 std::invalid_argument, prefix <<
"unitDiag has an invalid value "
1169 << unitDiag <<
". Valid values are Teuchos::UNIT_DIAG="
1174 std::invalid_argument, prefix <<
"Triangular solve with an empty matrix "
1175 "is only valid if the matrix has an implicit unit diagonal. This matrix "
1180 std::invalid_argument, prefix <<
"trans has an invalid value " << trans
1182 <<
"Teuchos::TRANS=" <<
Teuchos::TRANS <<
", and Teuchos::CONJ_TRANS="
1186 numRows != static_cast<LO> (X.dimension_0 ()), std::invalid_argument,
1187 prefix <<
"numRows = " << numRows <<
" != X.dimension_0() = " <<
1188 X.dimension_0 () <<
".");
1190 numCols != static_cast<LO> (Y.dimension_0 ()), std::invalid_argument,
1191 prefix <<
"numCols = " << numCols <<
" != Y.dimension_0() = " <<
1192 Y.dimension_0 () <<
".");
1194 numVecs != static_cast<LO> (Y.dimension_1 ()), std::invalid_argument,
1195 prefix <<
"X.dimension_1 () = " << numVecs <<
" != Y.dimension_1 () = "
1196 << Y.dimension_1 () <<
".");
1201 lowerTriSolveCsrUnitDiag (X, A, Y);
1203 lowerTriSolveCsr (X, A, Y);
1207 upperTriSolveCsrUnitDiag (X, A, Y);
1209 upperTriSolveCsr (X, A, Y);
1217 upperTriSolveCscUnitDiag (X, A, Y);
1219 upperTriSolveCsc (X, A, Y);
1225 lowerTriSolveCscUnitDiag (X, A, Y);
1227 lowerTriSolveCsc (X, A, Y);
1235 upperTriSolveCscUnitDiagConj (X, A, Y);
1237 upperTriSolveCscConj (X, A, Y);
1243 lowerTriSolveCscUnitDiagConj (X, A, Y);
1245 lowerTriSolveCscConj (X, A, Y);
1255 #endif // KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static KOKKOS_FORCEINLINE_FUNCTION T zero()
The zero value of T; the arithmetic identity.
Traits class for arithmetic on type T.
Declaration and definition of Kokkos::Details::ArithTraits.