42 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP 
   43 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP 
   53 #include "Tpetra_CrsMatrix.hpp" 
   54 #include "Tpetra_Export.hpp" 
   55 #include "Tpetra_Map.hpp" 
   56 #include "Tpetra_MultiVector.hpp" 
   57 #include "Tpetra_RowMatrix.hpp" 
   58 #include "Kokkos_Core.hpp" 
   59 #include "Teuchos_CommHelpers.hpp" 
   65 template<
class SC, 
class LO, 
class GO, 
class NT>
 
   70   const LO lclNumRows = 
static_cast<LO
> (rowMap.getNodeNumElements ());
 
   72   std::size_t maxNumEnt {0};
 
   73   for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
 
   75     maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
 
   80 template<
class SC, 
class LO, 
class GO, 
class NT>
 
   84                           const std::size_t maxNumEnt,
 
   85                           std::function<
void (
const LO lclRow,
 
   86                                               const Teuchos::ArrayView<LO>& ,
 
   87                                               const Teuchos::ArrayView<SC>& ,
 
   88                                               std::size_t  )> doForEachRow)
 
   90   Teuchos::Array<LO> indBuf (maxNumEnt);
 
   91   Teuchos::Array<SC> valBuf (maxNumEnt);
 
   93   for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
 
   95     Teuchos::ArrayView<LO> ind = indBuf.view (0, numEnt);
 
   96     Teuchos::ArrayView<SC> val = valBuf.view (0, numEnt);
 
   98     doForEachRow (lclRow, ind, val, numEnt);
 
  102 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  105                           std::function<
void (
const LO lclRow,
 
  106                                               const Teuchos::ArrayView<LO>& ,
 
  107                                               const Teuchos::ArrayView<SC>& ,
 
  108                                               std::size_t  )> doForEachRow)
 
  111   const LO lclNumRows = 
static_cast<LO
> (rowMap.getNodeNumElements ());
 
  112   const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
 
  114   forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
 
  120 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  123                                                               typename NT::device_type>& result,
 
  126   using KAT = Kokkos::ArithTraits<SC>;
 
  127   using mag_type = 
typename KAT::mag_type;
 
  129   auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
 
  131   auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
 
  133   forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
 
  134     [&] (
const LO lclRow,
 
  135          const Teuchos::ArrayView<LO>& ind,
 
  136          const Teuchos::ArrayView<SC>& val,
 
  137          std::size_t numEnt) {
 
  138       const mag_type rowNorm = rowNorms_h[lclRow];
 
  139       for (std::size_t k = 0; k < numEnt; ++k) {
 
  140         const mag_type matrixAbsVal = KAT::abs (val[k]);
 
  141         const LO lclCol = ind[k];
 
  143         rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
 
  151 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  152 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, 
typename NT::device_type>
 
  155   using KAT = Kokkos::ArithTraits<SC>;
 
  156   using val_type = 
typename KAT::val_type;
 
  157   using mag_type = 
typename KAT::mag_type;
 
  158   using KAM = Kokkos::ArithTraits<mag_type>;
 
  159   using device_type = 
typename NT::device_type;
 
  164   const LO lclNumRows = 
static_cast<LO
> (rowMap.getNodeNumElements ());
 
  165   const LO lclNumCols = 0; 
 
  166   constexpr 
bool assumeSymmetric = 
false; 
 
  167   equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
 
  168   auto result_h = result.createMirrorView ();
 
  170   forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
 
  171     [&] (
const LO lclRow,
 
  172          const Teuchos::ArrayView<LO>& ind,
 
  173          const Teuchos::ArrayView<SC>& val,
 
  174          std::size_t numEnt) {
 
  175       mag_type rowNorm {0.0};
 
  176       val_type diagVal {0.0};
 
  177       const GO gblRow = rowMap.getGlobalElement (lclRow);
 
  179       const GO lclDiagColInd = colMap.getLocalElement (gblRow);
 
  181       for (std::size_t k = 0; k < numEnt; ++k) {
 
  182         const val_type matrixVal = val[k];
 
  183         if (KAT::isInf (matrixVal)) {
 
  184           result_h.foundInf = 
true;
 
  186         if (KAT::isNan (matrixVal)) {
 
  187           result_h.foundNan = 
true;
 
  189         const mag_type matrixAbsVal = KAT::abs (matrixVal);
 
  190         rowNorm += matrixAbsVal;
 
  191         const LO lclCol = ind[k];
 
  192         if (lclCol == lclDiagColInd) {
 
  199       if (diagVal == KAT::zero ()) {
 
  200         result_h.foundZeroDiag = 
true;
 
  202       if (rowNorm == KAM::zero ()) {
 
  203         result_h.foundZeroRowNorm = 
true;
 
  209       result_h.rowDiagonalEntries[lclRow] += diagVal;
 
  210       result_h.rowNorms[lclRow] = rowNorm;
 
  213   result.assign (result_h);
 
  219 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  220 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, 
typename NT::device_type>
 
  222                                             const bool assumeSymmetric)
 
  224   using KAT = Kokkos::ArithTraits<SC>;
 
  225   using val_type = 
typename KAT::val_type;
 
  226   using mag_type = 
typename KAT::mag_type;
 
  227   using KAM = Kokkos::ArithTraits<mag_type>;
 
  228   using device_type = 
typename NT::device_type;
 
  232   const LO lclNumRows = 
static_cast<LO
> (rowMap.getNodeNumElements ());
 
  233   const LO lclNumCols = 
static_cast<LO
> (colMap.getNodeNumElements ());
 
  236     (lclNumRows, lclNumCols, assumeSymmetric);
 
  237   auto result_h = result.createMirrorView ();
 
  239   forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
 
  240     [&] (
const LO lclRow,
 
  241          const Teuchos::ArrayView<LO>& ind,
 
  242          const Teuchos::ArrayView<SC>& val,
 
  243          std::size_t numEnt) {
 
  244       mag_type rowNorm {0.0};
 
  245       val_type diagVal {0.0};
 
  246       const GO gblRow = rowMap.getGlobalElement (lclRow);
 
  248       const GO lclDiagColInd = colMap.getLocalElement (gblRow);
 
  250       for (std::size_t k = 0; k < numEnt; ++k) {
 
  251         const val_type matrixVal = val[k];
 
  252         if (KAT::isInf (matrixVal)) {
 
  253           result_h.foundInf = 
true;
 
  255         if (KAT::isNan (matrixVal)) {
 
  256           result_h.foundNan = 
true;
 
  258         const mag_type matrixAbsVal = KAT::abs (matrixVal);
 
  259         rowNorm += matrixAbsVal;
 
  260         const LO lclCol = ind[k];
 
  261         if (lclCol == lclDiagColInd) {
 
  264         if (! assumeSymmetric) {
 
  265           result_h.colNorms[lclCol] += matrixAbsVal;
 
  271       if (diagVal == KAT::zero ()) {
 
  272         result_h.foundZeroDiag = 
true;
 
  274       if (rowNorm == KAM::zero ()) {
 
  275         result_h.foundZeroRowNorm = 
true;
 
  281       result_h.rowDiagonalEntries[lclRow] += diagVal;
 
  282       result_h.rowNorms[lclRow] = rowNorm;
 
  283       if (! assumeSymmetric &&
 
  284           lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
 
  285         result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
 
  293 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  294 class ComputeLocalRowScaledColumnNorms {
 
  297   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  298   using mag_type = 
typename Kokkos::ArithTraits<val_type>::mag_type;
 
  301   ComputeLocalRowScaledColumnNorms (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
 
  302                                     const Kokkos::View<const mag_type*, device_type>& rowNorms,
 
  303                                     const crs_matrix_type& A) :
 
  304     rowScaledColNorms_ (rowScaledColNorms),
 
  305     rowNorms_ (rowNorms),
 
  306     A_lcl_ (A.getLocalMatrix ())
 
  309   KOKKOS_INLINE_FUNCTION 
void operator () (
const LO lclRow)
 const {
 
  310     using KAT = Kokkos::ArithTraits<val_type>;
 
  312     const auto curRow = A_lcl_.rowConst (lclRow);
 
  313     const mag_type rowNorm = rowNorms_[lclRow];
 
  314     const LO numEnt = curRow.length;
 
  315     for (LO k = 0; k < numEnt; ++k) {
 
  316       const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
 
  317       const LO lclCol = curRow.colidx(k);
 
  319       Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
 
  324   run (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
 
  325        const Kokkos::View<const mag_type*, device_type>& rowNorms,
 
  326        const crs_matrix_type& A)
 
  328     using execution_space = 
typename device_type::execution_space;
 
  329     using range_type = Kokkos::RangePolicy<execution_space, LO>;
 
  330     using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
 
  332     functor_type functor (rowScaledColNorms, rowNorms, A);
 
  333     const LO lclNumRows =
 
  334       static_cast<LO
> (A.getRowMap ()->getNodeNumElements ());
 
  335     Kokkos::parallel_for (
"computeLocalRowScaledColumnNorms",
 
  336                           range_type (0, lclNumRows), functor);
 
  340   Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
 
  341   Kokkos::View<const mag_type*, device_type> rowNorms_;
 
  344   local_matrix_type A_lcl_;
 
  347 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  349 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
 
  350                                                               typename NT::device_type>& result,
 
  353   using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
 
  354   impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
 
  357 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  359 computeLocalRowScaledColumnNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
 
  360                                                     typename NT::device_type>& result,
 
  364   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  365   using mag_type = 
typename Kokkos::ArithTraits<val_type>::mag_type;
 
  366   using device_type = 
typename NT::device_type;
 
  369   TEUCHOS_TEST_FOR_EXCEPTION
 
  370     (colMapPtr.get () == 
nullptr, std::invalid_argument,
 
  371      "computeLocalRowScaledColumnNorms: " 
  372      "Input matrix A must have a nonnull column Map.");
 
  373   const LO lclNumCols = 
static_cast<LO
> (colMapPtr->getNodeNumElements ());
 
  374   if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
 
  375       static_cast<std::size_t> (lclNumCols)) {
 
  376     result.rowScaledColNorms =
 
  377       Kokkos::View<mag_type*, device_type> (
"rowScaledColNorms", lclNumCols);
 
  380   const crs_matrix_type* A_crs = 
dynamic_cast<const crs_matrix_type*
> (&A);
 
  381   if (A_crs == 
nullptr) {
 
  385     computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
 
  391 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  392 class ComputeLocalRowOneNorms {
 
  394   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  395   using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
 
  396   using local_matrix_type =
 
  397     typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_type;
 
  398   using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
 
  400   ComputeLocalRowOneNorms (
const equib_info_type& equib,   
 
  401                            const local_matrix_type& A_lcl, 
 
  402                            const local_map_type& rowMap,   
 
  403                            const local_map_type& colMap) : 
 
  416   using value_type = int;
 
  418   KOKKOS_INLINE_FUNCTION 
void init (value_type& dst)
 const 
  423   KOKKOS_INLINE_FUNCTION 
void 
  424   join (
volatile value_type& dst,
 
  425         const volatile value_type& src)
 const 
  430   KOKKOS_INLINE_FUNCTION 
void 
  431   operator () (
const LO lclRow, value_type& dst)
 const 
  433     using KAT = Kokkos::ArithTraits<val_type>;
 
  434     using mag_type = 
typename KAT::mag_type;
 
  435     using KAM = Kokkos::ArithTraits<mag_type>;
 
  437     const GO gblRow = rowMap_.getGlobalElement (lclRow);
 
  439     const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
 
  441     const auto curRow = A_lcl_.rowConst (lclRow);
 
  442     const LO numEnt = curRow.length;
 
  444     mag_type rowNorm {0.0};
 
  445     val_type diagVal {0.0};
 
  447     for (LO k = 0; k < numEnt; ++k) {
 
  448       const val_type matrixVal = curRow.value (k);
 
  449       if (KAT::isInf (matrixVal)) {
 
  452       if (KAT::isNan (matrixVal)) {
 
  455       const mag_type matrixAbsVal = KAT::abs (matrixVal);
 
  456       rowNorm += matrixAbsVal;
 
  457       const LO lclCol = curRow.colidx (k);
 
  458       if (lclCol == lclDiagColInd) {
 
  459         diagVal = curRow.value (k); 
 
  465     if (diagVal == KAT::zero ()) {
 
  468     if (rowNorm == KAM::zero ()) {
 
  471     equib_.rowDiagonalEntries[lclRow] = diagVal;
 
  472     equib_.rowNorms[lclRow] = rowNorm;
 
  476   equib_info_type equib_;
 
  477   local_matrix_type A_lcl_;
 
  478   local_map_type rowMap_;
 
  479   local_map_type colMap_;
 
  484 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  485 class ComputeLocalRowAndColumnOneNorms {
 
  487   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  488   using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
 
  489   using local_matrix_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_type;
 
  490   using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
 
  493   ComputeLocalRowAndColumnOneNorms (
const equib_info_type& equib,   
 
  494                                     const local_matrix_type& A_lcl, 
 
  495                                     const local_map_type& rowMap,   
 
  496                                     const local_map_type& colMap) : 
 
  509   using value_type = int;
 
  511   KOKKOS_INLINE_FUNCTION 
void init (value_type& dst)
 const 
  516   KOKKOS_INLINE_FUNCTION 
void 
  517   join (
volatile value_type& dst,
 
  518         const volatile value_type& src)
 const 
  523   KOKKOS_INLINE_FUNCTION 
void 
  524   operator () (
const LO lclRow, value_type& dst)
 const 
  526     using KAT = Kokkos::ArithTraits<val_type>;
 
  527     using mag_type = 
typename KAT::mag_type;
 
  528     using KAM = Kokkos::ArithTraits<mag_type>;
 
  530     const GO gblRow = rowMap_.getGlobalElement (lclRow);
 
  532     const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
 
  534     const auto curRow = A_lcl_.rowConst (lclRow);
 
  535     const LO numEnt = curRow.length;
 
  537     mag_type rowNorm {0.0};
 
  538     val_type diagVal {0.0};
 
  540     for (LO k = 0; k < numEnt; ++k) {
 
  541       const val_type matrixVal = curRow.value (k);
 
  542       if (KAT::isInf (matrixVal)) {
 
  545       if (KAT::isNan (matrixVal)) {
 
  548       const mag_type matrixAbsVal = KAT::abs (matrixVal);
 
  549       rowNorm += matrixAbsVal;
 
  550       const LO lclCol = curRow.colidx (k);
 
  551       if (lclCol == lclDiagColInd) {
 
  552         diagVal = curRow.value (k); 
 
  554       if (! equib_.assumeSymmetric) {
 
  555         Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
 
  561     if (diagVal == KAT::zero ()) {
 
  564     if (rowNorm == KAM::zero ()) {
 
  571     equib_.rowDiagonalEntries[lclRow] = diagVal;
 
  572     equib_.rowNorms[lclRow] = rowNorm;
 
  573     if (! equib_.assumeSymmetric &&
 
  574         lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
 
  577       equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
 
  582   equib_info_type equib_;
 
  583   local_matrix_type A_lcl_;
 
  584   local_map_type rowMap_;
 
  585   local_map_type colMap_;
 
  590 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  591 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, 
typename NT::device_type>
 
  594   using execution_space = 
typename NT::device_type::execution_space;
 
  595   using range_type = Kokkos::RangePolicy<execution_space, LO>;
 
  596   using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
 
  597   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  598   using device_type = 
typename NT::device_type;
 
  601   const LO lclNumRows = 
static_cast<LO
> (A.
getRowMap ()->getNodeNumElements ());
 
  602   const LO lclNumCols = 0; 
 
  603   constexpr 
bool assumeSymmetric = 
false; 
 
  604   equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
 
  610   Kokkos::parallel_reduce (
"computeLocalRowOneNorms",
 
  611                            range_type (0, lclNumRows), functor,
 
  613   equib.foundInf = (result & 1) != 0;
 
  614   equib.foundNan = (result & 2) != 0;
 
  615   equib.foundZeroDiag = (result & 4) != 0;
 
  616   equib.foundZeroRowNorm = (result & 8) != 0;
 
  622 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  623 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, 
typename NT::device_type>
 
  625                                             const bool assumeSymmetric)
 
  627   using execution_space = 
typename NT::device_type::execution_space;
 
  628   using range_type = Kokkos::RangePolicy<execution_space, LO>;
 
  629   using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
 
  630   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  631   using device_type = 
typename NT::device_type;
 
  634   const LO lclNumRows = 
static_cast<LO
> (A.
getRowMap ()->getNodeNumElements ());
 
  635   const LO lclNumCols = 
static_cast<LO
> (A.
getColMap ()->getNodeNumElements ());
 
  636   equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
 
  642   Kokkos::parallel_reduce (
"computeLocalRowAndColumnOneNorms",
 
  643                            range_type (0, lclNumRows), functor,
 
  645   equib.foundInf = (result & 1) != 0;
 
  646   equib.foundNan = (result & 2) != 0;
 
  647   equib.foundZeroDiag = (result & 4) != 0;
 
  648   equib.foundZeroRowNorm = (result & 8) != 0;
 
  656 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  657 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
 
  658                   typename NT::device_type>
 
  662   const crs_matrix_type* A_crs = 
dynamic_cast<const crs_matrix_type*
> (&A);
 
  664   if (A_crs == 
nullptr) {
 
  693 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  694 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, 
typename NT::device_type>
 
  696                                   const bool assumeSymmetric)
 
  699   const crs_matrix_type* A_crs = 
dynamic_cast<const crs_matrix_type*
> (&A);
 
  701   if (A_crs == 
nullptr) {
 
  709 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  713   return X.template getLocalView<typename NT::device_type::memory_space> ();
 
  716 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  718                       const LO whichColumn)
 
  719   -> decltype (Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn))
 
  721   if (X.isConstantStride ()) {
 
  722     return Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn);
 
  725     auto X_whichColumn = X.getVector (whichColumn);
 
  726     return Kokkos::subview (getLocalView_2d (*X_whichColumn), Kokkos::ALL (), 0);
 
  730 template<
class SC, 
class LO, 
class GO, 
class NT, 
class ViewValueType>
 
  733                                  const LO whichColumn,
 
  734                                  const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
 
  736   using dev_memory_space = 
typename NT::device_type::memory_space;
 
  738   X.template modify<dev_memory_space> ();
 
  739   auto X_lcl = getLocalView_1d (X, whichColumn);
 
  743 template<
class SC, 
class LO, 
class GO, 
class NT, 
class ViewValueType>
 
  745 copyMultiVectorColumnInto1DView (
const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
 
  747                                  const LO whichColumn)
 
  749   using dev_memory_space = 
typename NT::device_type::memory_space;
 
  750   X.template sync<dev_memory_space> ();
 
  751   auto X_lcl = getLocalView_1d (X, whichColumn);
 
  755 template<
class OneDViewType, 
class IndexType>
 
  758   static_assert (OneDViewType::Rank == 1,
 
  759                  "OneDViewType must be a rank-1 Kokkos::View.");
 
  760   static_assert (std::is_integral<IndexType>::value,
 
  761                  "IndexType must be a built-in integer type.");
 
  762   FindZero (
const OneDViewType& x) : x_ (x) {}
 
  765   KOKKOS_INLINE_FUNCTION 
void 
  766   operator () (
const IndexType i, 
int& result)
 const {
 
  767     using val_type = 
typename OneDViewType::non_const_value_type;
 
  768     result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
 
  774 template<
class OneDViewType>
 
  775 bool findZero (
const OneDViewType& x)
 
  777   using view_type = 
typename OneDViewType::const_type;
 
  778   using execution_space = 
typename view_type::execution_space;
 
  779   using size_type = 
typename view_type::size_type;
 
  780   using functor_type = FindZero<view_type, size_type>;
 
  782   Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
 
  783   range.set (Kokkos::ChunkSize (500)); 
 
  786   Kokkos::parallel_reduce (
"findZero", range, functor_type (x), foundZero);
 
  787   return foundZero == 1;
 
  790 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  792 globalizeRowOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
 
  793                                         typename NT::device_type>& equib,
 
  799   TEUCHOS_TEST_FOR_EXCEPTION
 
  800     (G.get () == 
nullptr, std::invalid_argument,
 
  801      "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph " 
  802      "(that is, getGraph() must return nonnull).");
 
  803   TEUCHOS_TEST_FOR_EXCEPTION
 
  804     (! G->isFillComplete (), std::invalid_argument,
 
  805      "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
 
  807   auto exp = G->getExporter ();
 
  808   if (! exp.is_null ()) {
 
  818     mv_type rowMapMV (G->getRowMap (), 2, 
false);
 
  820     copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
 
  821     copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
 
  823       mv_type rangeMapMV (G->getRangeMap (), 2, 
true);
 
  827     copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
 
  828     copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
 
  833     equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
 
  834     equib.foundZeroRowNorm = findZero (equib.rowNorms);
 
  837   constexpr 
int allReduceCount = 4;
 
  838   int lclNaughtyMatrix[allReduceCount];
 
  839   lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
 
  840   lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
 
  841   lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
 
  842   lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
 
  844   using Teuchos::outArg;
 
  845   using Teuchos::REDUCE_MAX;
 
  846   using Teuchos::reduceAll;
 
  847   auto comm = G->getComm ();
 
  848   int gblNaughtyMatrix[allReduceCount];
 
  849   reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
 
  850                        lclNaughtyMatrix, gblNaughtyMatrix);
 
  852   equib.foundInf = gblNaughtyMatrix[0] == 1;
 
  853   equib.foundNan = gblNaughtyMatrix[1] == 1;
 
  854   equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
 
  855   equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
 
  858 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  860 globalizeColumnOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
 
  861                                            typename NT::device_type>& equib,
 
  863                          const bool assumeSymmetric) 
 
  865   using val_type = 
typename Kokkos::ArithTraits<SC>::val_type;
 
  866   using mag_type = 
typename Kokkos::ArithTraits<val_type>::mag_type;
 
  868   using device_type = 
typename NT::device_type;
 
  871   TEUCHOS_TEST_FOR_EXCEPTION
 
  872     (G.get () == 
nullptr, std::invalid_argument,
 
  873      "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph " 
  874      "(that is, getGraph() must return nonnull).");
 
  875   TEUCHOS_TEST_FOR_EXCEPTION
 
  876     (! G->isFillComplete (), std::invalid_argument,
 
  877      "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
 
  879   auto imp = G->getImporter ();
 
  880   if (assumeSymmetric) {
 
  881     const LO numCols = 2;
 
  885     mv_type rowNorms_domMap (G->getDomainMap (), numCols, 
false);
 
  886     const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
 
  887     if (rowMapSameAsDomainMap) {
 
  888       copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
 
  889       copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
 
  895       mv_type rowNorms_rowMap (G->getRowMap (), numCols, 
true);
 
  896       copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
 
  897       copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
 
  903     std::unique_ptr<mv_type> rowNorms_colMap;
 
  904     if (imp.is_null ()) {
 
  907         std::unique_ptr<mv_type> (
new mv_type (rowNorms_domMap, * (G->getColMap ())));
 
  911         std::unique_ptr<mv_type> (
new mv_type (G->getColMap (), numCols, 
true));
 
  916     const LO lclNumCols =
 
  917       static_cast<LO
> (G->getColMap ()->getNodeNumElements ());
 
  918     if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
 
  920         Kokkos::View<mag_type*, device_type> (
"colNorms", lclNumCols);
 
  922     if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
 
  923       equib.colDiagonalEntries =
 
  924         Kokkos::View<val_type*, device_type> (
"colDiagonalEntries", lclNumCols);
 
  929     copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
 
  930     copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
 
  933     if (! imp.is_null ()) {
 
  934       const LO numCols = 3;
 
  943       mv_type colMapMV (G->getColMap (), numCols, 
false);
 
  945       copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
 
  946       copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
 
  947       copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
 
  949         mv_type domainMapMV (G->getDomainMap (), numCols, 
true);
 
  950         domainMapMV.doExport (colMapMV, *imp, 
Tpetra::ADD); 
 
  953       copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
 
  954       copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
 
  955       copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
 
  962 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  963 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
 
  964                            typename NT::device_type>
 
  967   TEUCHOS_TEST_FOR_EXCEPTION
 
  969      "computeRowOneNorms: Input matrix A must be fillComplete.");
 
  972   Details::globalizeRowOneNorms (result, A);
 
  976 template<
class SC, 
class LO, 
class GO, 
class NT>
 
  977 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
 
  978                            typename NT::device_type>
 
  980                              const bool assumeSymmetric)
 
  982   TEUCHOS_TEST_FOR_EXCEPTION
 
  984      "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
 
  987   Details::globalizeRowOneNorms (result, A);
 
  988   if (! assumeSymmetric) {
 
  992     Details::computeLocalRowScaledColumnNorms (result, A);
 
  994   Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
 
 1006 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \ 
 1007   template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \ 
 1008   computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \ 
 1010   template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \ 
 1011   computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \ 
 1012                                const bool assumeSymmetric); 
 1014 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP 
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes. 
 
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
 
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix. 
 
virtual void getLocalRowCopy(LocalOrdinal LocalRow, const Teuchos::ArrayView< LocalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries. 
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
 
One or more distributed dense vectors. 
 
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row. 
 
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix. 
 
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A...
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix. 
 
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
 
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
 
Struct storing results of Tpetra::computeRowAndColumnOneNorms. 
 
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst. 
 
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms. 
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix. 
 
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A. 
 
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
 
Sum new values into existing values. 
 
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
 
typename Node::device_type device_type
The Kokkos device type. 
 
Replace existing values with new values. 
 
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this. 
 
virtual bool isFillComplete() const =0
Whether fillComplete() has been called. 
 
A read-only, row-oriented interface to a sparse matrix. 
 
local_matrix_type getLocalMatrix() const 
The local sparse matrix. 
 
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix. 
 
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes. 
 
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.