41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
42 #define TPETRA_MATRIXMATRIX_DEF_HPP
43 #include "Tpetra_ConfigDefs.hpp"
45 #include "Teuchos_VerboseObject.hpp"
46 #include "Teuchos_Array.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
50 #include "Tpetra_RowMatrixTransposer.hpp"
52 #include "Tpetra_Details_radixSort.hpp"
54 #include "Tpetra_ConfigDefs.hpp"
55 #include "Tpetra_Map.hpp"
56 #include "Tpetra_Export.hpp"
60 #include <type_traits>
61 #include "Teuchos_FancyOStream.hpp"
63 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
66 #include "KokkosSparse_spgemm.hpp"
67 #include "KokkosSparse_spadd.hpp"
81 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
82 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
87 namespace MatrixMatrix{
95 template <
class Scalar,
105 bool call_FillComplete_on_result,
106 const std::string& label,
107 const Teuchos::RCP<Teuchos::ParameterList>& params)
113 typedef LocalOrdinal LO;
114 typedef GlobalOrdinal GO;
122 #ifdef HAVE_TPETRA_MMM_TIMINGS
123 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
124 using Teuchos::TimeMonitor;
127 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup"))));
130 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
135 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
136 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
141 RCP<const crs_matrix_type> Aprime = null;
145 RCP<const crs_matrix_type> Bprime = null;
155 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
157 bool use_optimized_ATB =
false;
158 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
159 use_optimized_ATB =
true;
161 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
162 use_optimized_ATB =
false;
165 using Teuchos::ParameterList;
166 RCP<ParameterList> transposeParams (
new ParameterList);
167 transposeParams->set (
"sort",
false);
169 if (!use_optimized_ATB && transposeA) {
170 transposer_type transposer (rcpFromRef (A));
171 Aprime = transposer.createTranspose (transposeParams);
174 Aprime = rcpFromRef(A);
178 transposer_type transposer (rcpFromRef (B));
179 Bprime = transposer.createTranspose (transposeParams);
182 Bprime = rcpFromRef(B);
192 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
193 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
194 "must match for matrix-matrix product. op(A) is "
195 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
201 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
202 prefix <<
"ERROR, dimensions of result C must "
204 <<
" rows, should have at least " << Aouter << std::endl);
216 int numProcs = A.
getComm()->getSize();
220 crs_matrix_struct_type Aview;
221 crs_matrix_struct_type Bview;
223 RCP<const map_type> targetMap_A = Aprime->getRowMap();
224 RCP<const map_type> targetMap_B = Bprime->getRowMap();
226 #ifdef HAVE_TPETRA_MMM_TIMINGS
228 TimeMonitor MM_importExtract(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X")));
234 if (!use_optimized_ATB) {
235 RCP<const import_type> dummyImporter;
236 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
242 targetMap_B = Aprime->getColMap();
245 if (!use_optimized_ATB)
246 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
248 #ifdef HAVE_TPETRA_MMM_TIMINGS
251 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply"))));
255 if (use_optimized_ATB) {
256 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
258 }
else if (call_FillComplete_on_result && newFlag) {
259 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
261 }
else if (call_FillComplete_on_result) {
262 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
268 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
270 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
273 #ifdef HAVE_TPETRA_MMM_TIMINGS
274 TimeMonitor MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete")));
283 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
288 template <
class Scalar,
297 bool call_FillComplete_on_result,
298 const std::string& label,
299 const Teuchos::RCP<Teuchos::ParameterList>& params)
303 typedef LocalOrdinal LO;
304 typedef GlobalOrdinal GO;
311 #ifdef HAVE_TPETRA_MMM_TIMINGS
312 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
313 using Teuchos::TimeMonitor;
314 TimeMonitor MM(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup")));
317 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
322 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
323 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
325 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
326 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
335 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
336 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
337 "must match for matrix-matrix product. op(A) is "
338 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
344 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
345 prefix <<
"ERROR, dimensions of result C must "
347 <<
" rows, should have at least "<< Aouter << std::endl);
359 int numProcs = A.
getComm()->getSize();
363 crs_matrix_struct_type Aview;
364 crs_matrix_struct_type Bview;
366 RCP<const map_type> targetMap_A = Aprime->getRowMap();
367 RCP<const map_type> targetMap_B = Bprime->getRowMap();
369 #ifdef HAVE_TPETRA_MMM_TIMINGS
370 TimeMonitor MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X")));
376 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
377 if(!params.is_null()) {
378 importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
379 int mm_optimization_core_count=0;
380 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
382 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
383 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
384 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
385 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
386 auto & ip1slist = importParams1->sublist(
"matrixmatrix: kernel params",
false);
387 ip1slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
388 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
389 ip1slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
393 RCP<const import_type> dummyImporter;
394 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label,importParams1);
399 targetMap_B = Aprime->getColMap();
404 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
405 if(!params.is_null()) {
406 importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
408 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
410 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
411 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
412 auto & ip2slist = importParams2->sublist(
"matrixmatrix: kernel params",
false);
413 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
414 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
415 ip2slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
416 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
417 ip2slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
420 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label,importParams2);
422 #ifdef HAVE_TPETRA_MMM_TIMINGS
424 TimeMonitor MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply")));
428 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
431 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
433 if (call_FillComplete_on_result && newFlag) {
434 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
436 }
else if (call_FillComplete_on_result) {
437 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
440 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
463 template <
class Scalar,
474 using Teuchos::Array;
478 typedef LocalOrdinal LO;
479 typedef GlobalOrdinal GO;
484 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
486 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
487 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
488 "(Result matrix B is not required to be isFillComplete()).");
489 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
490 prefix <<
"ERROR, input matrix B must not be fill complete!");
491 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
492 prefix <<
"ERROR, input matrix B must not have static graph!");
494 prefix <<
"ERROR, input matrix B must not be locally indexed!");
496 using Teuchos::ParameterList;
497 RCP<ParameterList> transposeParams (
new ParameterList);
498 transposeParams->set (
"sort",
false);
500 RCP<const crs_matrix_type> Aprime = null;
502 transposer_type transposer (rcpFromRef (A));
503 Aprime = transposer.createTranspose (transposeParams);
506 Aprime = rcpFromRef(A);
514 if (scalarB != Teuchos::ScalarTraits<SC>::one())
519 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
520 for (LO i = 0; (size_t)i < numMyRows; ++i) {
521 row = B.
getRowMap()->getGlobalElement(i);
522 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
524 if (scalarA != Teuchos::ScalarTraits<SC>::one())
525 for (
size_t j = 0; j < a_numEntries; ++j)
526 a_vals[j] *= scalarA;
536 namespace ColMapFunctors
538 template<
typename ByteView,
typename GView>
541 UnionEntries(ByteView entryUnion_,
const GView gids_) : entryUnion(entryUnion_), gids(gids_) {}
542 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
544 entryUnion(gids(i)) = 1;
550 template<
typename LView,
typename GView>
551 struct ConvertGlobalToLocal
553 ConvertGlobalToLocal(
const LView gtol_,
const GView gids_, LView lids_) : gtol(gtol_), gids(gids_), lids(lids_) {}
554 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
556 lids(i) = gtol(gids(i));
566 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
567 Teuchos::RCP<Map<LocalOrdinal, GlobalOrdinal, Node> > AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
568 makeColMapAndConvertGids(GlobalOrdinal ncols,
569 const typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::global_col_inds_array& gids,
570 typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::col_inds_array& lids,
571 const Teuchos::RCP<
const Teuchos::Comm<int>>& comm)
573 using namespace ColMapFunctors;
576 typedef Kokkos::View<char*, device_type> ByteView;
577 typedef global_col_inds_array GView;
578 typedef col_inds_array LView;
580 auto nentries = gids.extent(0);
582 ByteView entryUnion(
"entry union", ncols);
583 UnionEntries<ByteView, GView> ue(entryUnion, gids);
584 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_unionEntries", range_type(0, nentries), ue);
586 LView gtol(
"global col -> local col", ncols + 1);
587 ::Tpetra::Details::computeOffsetsFromCounts<decltype(gtol), decltype(entryUnion)>(gtol, entryUnion);
589 ConvertGlobalToLocal<LView, GView> cgtl(gtol, gids, lids);
590 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertGlobalToLocal", range_type(0, gids.extent(0)), cgtl);
592 execution_space().fence();
593 GView colmap(
"column map", gtol(ncols));
594 size_t localIter = 0;
595 execution_space().fence();
596 for(
size_t i = 0; i < entryUnion.extent(0); i++)
598 if(entryUnion(i) != 0)
600 colmap(localIter++) = i;
603 execution_space().fence();
605 return rcp(
new map_type(Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), colmap, 0, comm));
608 template <
class Scalar,
612 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
614 const bool transposeA,
617 const bool transposeB,
621 const Teuchos::RCP<Teuchos::ParameterList>& params)
627 Teuchos::RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(CrowMap, 0));
629 add(alpha,transposeA,A,beta,transposeB,B,*C,domainMap,rangeMap,params);
633 template <
class Scalar,
639 const bool transposeA,
642 const bool transposeB,
647 const Teuchos::RCP<Teuchos::ParameterList>& params)
651 using Teuchos::rcpFromRef;
652 using Teuchos::rcp_implicit_cast;
653 using Teuchos::rcp_dynamic_cast;
654 using Teuchos::TimeMonitor;
656 typedef LocalOrdinal LO;
657 typedef GlobalOrdinal GO;
664 typedef AddDetails::AddKernels<SC,LO,GO,NO> AddKern;
665 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
666 constexpr
bool debug =
false;
668 #ifdef HAVE_TPETRA_MMM_TIMINGS
669 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"transpose"))));
673 std::ostringstream os;
674 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
675 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
676 std::cerr << os.str ();
679 TEUCHOS_TEST_FOR_EXCEPTION
681 prefix_mmm <<
"A and B must both be fill complete.");
682 #ifdef HAVE_TPETRA_DEBUG
685 const bool domainMapsSame =
686 (! transposeA && ! transposeB &&
688 (! transposeA && transposeB &&
690 ( transposeA && ! transposeB &&
692 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
693 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
695 const bool rangeMapsSame =
696 (! transposeA && ! transposeB &&
698 (! transposeA && transposeB &&
700 ( transposeA && ! transposeB &&
702 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
703 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
705 #endif // HAVE_TPETRA_DEBUG
707 using Teuchos::ParameterList;
708 RCP<ParameterList> transposeParams (
new ParameterList);
709 transposeParams->set (
"sort",
false);
713 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
715 transposer_type transposer (Aprime);
716 Aprime = transposer.createTranspose (transposeParams);
719 #ifdef HAVE_TPETRA_DEBUG
720 TEUCHOS_TEST_FOR_EXCEPTION
721 (Aprime.is_null (), std::logic_error,
722 prefix_mmm <<
"Failed to compute Op(A). "
723 "Please report this bug to the Tpetra developers.");
724 #endif // HAVE_TPETRA_DEBUG
727 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
730 std::ostringstream os;
731 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
732 <<
"Form explicit xpose of B" << std::endl;
733 std::cerr << os.str ();
735 transposer_type transposer (Bprime);
736 Bprime = transposer.createTranspose (transposeParams);
738 #ifdef HAVE_TPETRA_DEBUG
739 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
740 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
741 TEUCHOS_TEST_FOR_EXCEPTION(
742 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
743 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
744 "Please report this bug to the Tpetra developers.");
745 #endif // HAVE_TPETRA_DEBUG
746 RCP<const map_type> CDomainMap = domainMap;
747 RCP<const map_type> CRangeMap = rangeMap;
748 if(CDomainMap.is_null())
750 CDomainMap = Bprime->getDomainMap();
752 if(CRangeMap.is_null())
754 CRangeMap = Bprime->getRangeMap();
756 assert(!(CDomainMap.is_null()));
757 assert(!(CRangeMap.is_null()));
758 typedef typename AddKern::values_array values_array;
759 typedef typename AddKern::row_ptrs_array row_ptrs_array;
760 typedef typename AddKern::col_inds_array col_inds_array;
761 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
762 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
764 row_ptrs_array rowptrs;
765 col_inds_array colinds;
766 auto acolmap = Aprime->getColMap()->getMyGlobalIndices();
767 auto bcolmap = Bprime->getColMap()->getMyGlobalIndices();
768 #ifdef HAVE_TPETRA_MMM_TIMINGS
770 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"rowmap check/import"))));
772 if(!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap()))))
775 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
781 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
783 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
784 bool sorted = AGraphSorted && BGraphSorted;
785 RCP<const map_type> CrowMap;
786 RCP<const map_type> CcolMap;
787 RCP<const import_type> Cimport = Teuchos::null;
788 RCP<export_type> Cexport = Teuchos::null;
790 if(!matchingColMaps && !(CDomainMap->isContiguous()))
793 #ifdef HAVE_TPETRA_MMM_TIMINGS
795 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"fallback to CrsMatrix::add"))));
798 std::ostringstream os;
799 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
800 <<
"Call Bprime->add(...)" << std::endl;
801 std::cerr << os.str ();
803 Teuchos::RCP<crs_matrix_type> C_ = Teuchos::rcp_static_cast<crs_matrix_type>(Bprime->add(alpha, *Aprime, beta, CDomainMap, CRangeMap, params));
805 C.
setAllValues(C_->getLocalMatrix().graph.row_map,C_->getLocalMatrix().graph.entries,C_->getLocalMatrix().values);
806 C.
expertStaticFillComplete(CDomainMap, CRangeMap, C_->getGraph()->getImporter(), C_->getGraph()->getExporter(), params);
809 else if(!matchingColMaps)
811 #ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mismatched col map full kernel"))));
816 auto Acolmap = Aprime->getColMap()->getMyGlobalIndices();
817 auto Bcolmap = Bprime->getColMap()->getMyGlobalIndices();
818 typename AddKern::global_col_inds_array globalColinds(
"", 0);
820 std::ostringstream os;
821 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
822 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
823 std::cerr << os.str ();
825 AddKern::convertToGlobalAndAdd(
826 Aprime->getLocalMatrix(), alpha, Bprime->getLocalMatrix(), beta, Acolmap, Bcolmap,
827 CRangeMap->getMinGlobalIndex(), Aprime->getGlobalNumCols(), vals, rowptrs, globalColinds);
828 colinds = col_inds_array(
"C colinds", globalColinds.extent(0));
830 std::ostringstream os;
831 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
832 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
833 std::cerr << os.str ();
835 #ifdef HAVE_TPETRA_MMM_TIMINGS
837 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"building optimized column map"))));
839 CrowMap = Bprime->getRowMap();
842 CcolMap = AddKern::makeColMapAndConvertGids(Aprime->getGlobalNumCols(), globalColinds, colinds, comm);
847 auto localA = Aprime->getLocalMatrix();
848 auto localB = Bprime->getLocalMatrix();
849 auto Avals = localA.values;
850 auto Bvals = localB.values;
851 auto Arowptrs = localA.graph.row_map;
852 auto Browptrs = localB.graph.row_map;
853 auto Acolinds = localA.graph.entries;
854 auto Bcolinds = localB.graph.entries;
858 #ifdef HAVE_TPETRA_MMM_TIMINGS
860 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"sorted entries full kernel"))));
863 std::ostringstream os;
864 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
865 <<
"Call AddKern::addSorted(...)" << std::endl;
866 std::cerr << os.str ();
868 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, vals, rowptrs, colinds);
873 #ifdef HAVE_TPETRA_MMM_TIMINGS
875 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mm add unsorted entries full kernel"))));
878 std::ostringstream os;
879 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
880 <<
"Call AddKern::addUnsorted(...)" << std::endl;
881 std::cerr << os.str ();
883 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
885 CrowMap = Bprime->getRowMap();
886 CcolMap = Bprime->getColMap();
889 #ifdef HAVE_TPETRA_MMM_TIMINGS
891 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs constructor"))));
896 #ifdef HAVE_TPETRA_MMM_TIMINGS
898 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs expertStaticFillComplete"))));
900 if(!CDomainMap->isSameAs(*CcolMap))
903 std::ostringstream os;
904 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
905 <<
"Create Cimport" << std::endl;
906 std::cerr << os.str ();
908 Cimport = rcp(
new import_type(CDomainMap, CcolMap));
910 if(!CrowMap->isSameAs(*CRangeMap))
913 std::ostringstream os;
914 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
915 <<
"Create Cexport" << std::endl;
916 std::cerr << os.str ();
918 Cexport = rcp(
new export_type(CrowMap, CRangeMap));
922 std::ostringstream os;
923 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
924 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
925 std::cerr << os.str ();
931 template <
class Scalar,
944 using Teuchos::Array;
945 using Teuchos::ArrayRCP;
946 using Teuchos::ArrayView;
949 using Teuchos::rcp_dynamic_cast;
950 using Teuchos::rcpFromRef;
951 using Teuchos::tuple;
954 typedef Teuchos::ScalarTraits<Scalar> STS;
962 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
964 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
965 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
967 TEUCHOS_TEST_FOR_EXCEPTION(
969 prefix <<
"Both input matrices must be fill complete before calling this function.");
971 #ifdef HAVE_TPETRA_DEBUG
973 const bool domainMapsSame =
977 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
978 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
980 const bool rangeMapsSame =
984 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
985 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
987 #endif // HAVE_TPETRA_DEBUG
989 using Teuchos::ParameterList;
990 RCP<ParameterList> transposeParams (
new ParameterList);
991 transposeParams->set (
"sort",
false);
994 RCP<const crs_matrix_type> Aprime;
996 transposer_type theTransposer (rcpFromRef (A));
997 Aprime = theTransposer.createTranspose (transposeParams);
1000 Aprime = rcpFromRef (A);
1003 #ifdef HAVE_TPETRA_DEBUG
1004 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1005 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
1006 #endif // HAVE_TPETRA_DEBUG
1009 RCP<const crs_matrix_type> Bprime;
1011 transposer_type theTransposer (rcpFromRef (B));
1012 Bprime = theTransposer.createTranspose (transposeParams);
1015 Bprime = rcpFromRef (B);
1018 #ifdef HAVE_TPETRA_DEBUG
1019 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1020 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
1021 #endif // HAVE_TPETRA_DEBUG
1024 if (! C.is_null ()) {
1025 C->setAllToScalar (STS::zero ());
1033 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
1034 RCP<const map_type> rowMap = Aprime->getRowMap ();
1036 RCP<const crs_graph_type> A_graph =
1037 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
1038 #ifdef HAVE_TPETRA_DEBUG
1039 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
1040 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. "
1041 "Please report this bug to the Tpetra developers.");
1042 #endif // HAVE_TPETRA_DEBUG
1043 RCP<const crs_graph_type> B_graph =
1044 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
1045 #ifdef HAVE_TPETRA_DEBUG
1046 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
1047 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. "
1048 "Please report this bug to the Tpetra developers.");
1049 #endif // HAVE_TPETRA_DEBUG
1050 RCP<const map_type> A_colMap = A_graph->getColMap ();
1051 #ifdef HAVE_TPETRA_DEBUG
1052 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
1053 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. "
1054 "Please report this bug to the Tpetra developers.");
1055 #endif // HAVE_TPETRA_DEBUG
1056 RCP<const map_type> B_colMap = B_graph->getColMap ();
1057 #ifdef HAVE_TPETRA_DEBUG
1058 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
1059 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. "
1060 "Please report this bug to the Tpetra developers.");
1061 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
1063 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. "
1064 "Please report this bug to the Tpetra developers.");
1065 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
1067 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. "
1068 "Please report this bug to the Tpetra developers.");
1069 #endif // HAVE_TPETRA_DEBUG
1072 RCP<const import_type> sumImport =
1073 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
1074 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
1082 ArrayView<const LocalOrdinal> A_local_ind;
1083 Array<GlobalOrdinal> A_global_ind;
1084 ArrayView<const LocalOrdinal> B_local_ind;
1085 Array<GlobalOrdinal> B_global_ind;
1087 const size_t localNumRows = rowMap->getNodeNumElements ();
1088 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
1091 size_t maxNumEntriesPerRow = 0;
1092 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1094 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
1095 const size_type A_numEnt = A_local_ind.size ();
1096 if (A_numEnt > A_global_ind.size ()) {
1097 A_global_ind.resize (A_numEnt);
1100 for (size_type k = 0; k < A_numEnt; ++k) {
1101 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1105 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
1106 const size_type B_numEnt = B_local_ind.size ();
1107 if (B_numEnt > B_global_ind.size ()) {
1108 B_global_ind.resize (B_numEnt);
1111 for (size_type k = 0; k < B_numEnt; ++k) {
1112 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1116 const size_t curNumEntriesPerRow =
1117 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
1118 B_global_ind.begin (), B_global_ind.end ());
1119 numEntriesPerRow[localRow] = curNumEntriesPerRow;
1120 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
1127 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow, StaticProfile));
1132 ArrayView<const Scalar> A_val;
1133 ArrayView<const Scalar> B_val;
1135 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
1136 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
1137 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
1139 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1141 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
1143 for (size_type k = 0; k < A_local_ind.size (); ++k) {
1144 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1148 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
1150 for (size_type k = 0; k < B_local_ind.size (); ++k) {
1151 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1154 const size_t curNumEntries = numEntriesPerRow[localRow];
1155 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
1156 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
1157 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
1161 A_val.begin (), A_val.end (),
1162 B_global_ind.begin (), B_global_ind.end (),
1163 B_val.begin (), B_val.end (),
1164 C_global_ind.begin (), C_val.begin (),
1165 std::plus<Scalar> ());
1167 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
1168 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
1171 C->replaceLocalValues (localRow, C_local_ind, C_val);
1176 RCP<const map_type> domainMap = A_graph->getDomainMap ();
1177 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
1178 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
1186 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
1193 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
1197 #ifdef HAVE_TPETRA_DEBUG
1198 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1199 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1200 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1201 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1202 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
1203 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1204 #endif // HAVE_TPETRA_DEBUG
1206 Array<RCP<const crs_matrix_type> > Mat =
1207 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
1208 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
1211 for (
int k = 0; k < 2; ++k) {
1212 Array<GlobalOrdinal> Indices;
1213 Array<Scalar> Values;
1221 #ifdef HAVE_TPETRA_DEBUG
1222 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
1223 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1224 #endif // HAVE_TPETRA_DEBUG
1225 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
1226 #ifdef HAVE_TPETRA_DEBUG
1227 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
1228 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1229 #endif // HAVE_TPETRA_DEBUG
1231 const size_t localNumRows = Mat[k]->getNodeNumRows ();
1232 for (
size_t i = 0; i < localNumRows; ++i) {
1233 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
1234 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
1235 if (numEntries > 0) {
1236 Indices.resize (numEntries);
1237 Values.resize (numEntries);
1238 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
1240 if (scalar[k] != STS::one ()) {
1241 for (
size_t j = 0; j < numEntries; ++j) {
1242 Values[j] *= scalar[k];
1246 if (C->isFillComplete ()) {
1247 C->sumIntoGlobalValues (globalRow, Indices, Values);
1249 C->insertGlobalValues (globalRow, Indices, Values);
1256 template<
typename SC,
typename LO,
typename GO,
typename NO>
1257 void AddDetails::AddKernels<SC, LO, GO, NO>::
1259 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1260 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1261 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1262 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1263 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1264 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1265 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1266 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1267 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1268 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1269 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1271 using Teuchos::TimeMonitor;
1272 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1273 auto nrows = Arowptrs.extent(0) - 1;
1274 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1275 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, impl_scalar_type,
1276 execution_space, memory_space, memory_space> KKH;
1278 handle.create_spadd_handle(
true);
1279 auto addHandle = handle.get_spadd_handle();
1280 #ifdef HAVE_TPETRA_MMM_TIMINGS
1281 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1283 KokkosSparse::Experimental::spadd_symbolic
1285 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1286 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1287 row_ptrs_array, col_inds_array>
1288 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1289 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1290 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1291 #ifdef HAVE_TPETRA_MMM_TIMINGS
1293 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
1295 KokkosSparse::Experimental::spadd_numeric(&handle,
1296 Arowptrs, Acolinds, Avals, scalarA,
1297 Browptrs, Bcolinds, Bvals, scalarB,
1298 Crowptrs, Ccolinds, Cvals);
1301 template<
typename SC,
typename LO,
typename GO,
typename NO>
1302 void AddDetails::AddKernels<SC, LO, GO, NO>::
1304 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1305 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1306 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1307 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1308 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1309 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1310 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1311 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1313 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1314 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1315 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1317 using Teuchos::TimeMonitor;
1318 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1319 auto nrows = Arowptrs.extent(0) - 1;
1320 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1321 typedef AddDetails::AddKernels<SC, LO, GO, NO> AddKern;
1322 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, AddKern::impl_scalar_type,
1323 AddKern::execution_space, AddKern::memory_space, AddKern::memory_space> KKH;
1325 handle.create_spadd_handle(
false);
1326 auto addHandle = handle.get_spadd_handle();
1327 #ifdef HAVE_TPETRA_MMM_TIMINGS
1328 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1330 KokkosSparse::Experimental::spadd_symbolic
1332 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1333 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1334 row_ptrs_array, col_inds_array>
1335 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1336 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1337 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1338 #ifdef HAVE_TPETRA_MMM_TIMINGS
1340 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted kernel: sorted numeric")));
1342 KokkosSparse::Experimental::spadd_numeric(&handle,
1343 Arowptrs, Acolinds, Avals, scalarA,
1344 Browptrs, Bcolinds, Bvals, scalarB,
1345 Crowptrs, Ccolinds, Cvals);
1348 template<
typename GO,
1349 typename LocalIndicesType,
1350 typename GlobalIndicesType,
1351 typename ColMapType>
1352 struct ConvertColIndsFunctor
1354 ConvertColIndsFunctor (
const GO minGlobal_,
1355 const LocalIndicesType& colindsOrig_,
1356 const GlobalIndicesType& colindsConverted_,
1357 const ColMapType& colmap_) :
1358 minGlobal (minGlobal_),
1359 colindsOrig (colindsOrig_),
1360 colindsConverted (colindsConverted_),
1363 KOKKOS_INLINE_FUNCTION
void
1364 operator() (
const size_t& i)
const
1366 colindsConverted[i] = colmap[colindsOrig[i]];
1369 LocalIndicesType colindsOrig;
1370 GlobalIndicesType colindsConverted;
1374 template<
typename SC,
typename LO,
typename GO,
typename NO>
1375 void AddDetails::AddKernels<SC, LO, GO, NO>::
1376 convertToGlobalAndAdd(
1377 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
1378 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1379 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
1380 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1381 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
1382 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
1385 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1386 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1387 typename AddDetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
1389 using Teuchos::TimeMonitor;
1391 const values_array& Avals = A.values;
1392 const values_array& Bvals = B.values;
1393 const col_inds_array& Acolinds = A.graph.entries;
1394 const col_inds_array& Bcolinds = B.graph.entries;
1395 auto Arowptrs = A.graph.row_map;
1396 auto Browptrs = B.graph.row_map;
1397 global_col_inds_array AcolindsConverted(
"A colinds (converted)", Acolinds.extent(0));
1398 global_col_inds_array BcolindsConverted(
"B colinds (converted)", Bcolinds.extent(0));
1399 #ifdef HAVE_TPETRA_MMM_TIMINGS
1400 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
1402 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(minGlobalCol, Acolinds, AcolindsConverted, AcolMap);
1403 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
1404 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(minGlobalCol, Bcolinds, BcolindsConverted, BcolMap);
1405 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
1406 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, GO, impl_scalar_type,
1407 execution_space, memory_space, memory_space> KKH;
1409 handle.create_spadd_handle(
false);
1410 auto addHandle = handle.get_spadd_handle();
1411 #ifdef HAVE_TPETRA_MMM_TIMINGS
1413 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
1415 auto nrows = Arowptrs.extent(0) - 1;
1416 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1417 KokkosSparse::Experimental::spadd_symbolic
1418 <KKH,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type, row_ptrs_array, global_col_inds_array>
1419 (&handle, Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
1420 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1421 Ccolinds = global_col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1422 #ifdef HAVE_TPETRA_MMM_TIMINGS
1424 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
1426 KokkosSparse::Experimental::spadd_numeric(&handle,
1427 Arowptrs, AcolindsConverted, Avals, scalarA,
1428 Browptrs, BcolindsConverted, Bvals, scalarB,
1429 Crowptrs, Ccolinds, Cvals);
1434 namespace MMdetails{
1438 template <
class TransferType>
1439 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer,
const std::string &label) {
1440 if (Transfer.is_null())
1443 const Distributor & Distor = Transfer->getDistributor();
1444 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
1446 size_t rows_send = Transfer->getNumExportIDs();
1447 size_t rows_recv = Transfer->getNumRemoteIDs();
1449 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
1450 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
1451 size_t num_send_neighbors = Distor.getNumSends();
1452 size_t num_recv_neighbors = Distor.getNumReceives();
1453 size_t round2_send, round2_recv;
1454 Distor.getLastDoStatistics(round2_send,round2_recv);
1456 int myPID = Comm->getRank();
1457 int NumProcs = Comm->getSize();
1464 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
1465 size_t gstats_min[8], gstats_max[8];
1467 double lstats_avg[8], gstats_avg[8];
1468 for(
int i=0; i<8; i++)
1469 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
1471 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
1472 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
1473 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
1476 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1477 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
1478 (
int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (
int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
1479 printf("%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1480 (
int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (
int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
1481 (
int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (
int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
1486 template<class Scalar,
1488 class GlobalOrdinal,
1490 void mult_AT_B_newmatrix(
1491 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1492 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1493 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1494 const std::
string & label,
1495 const Teuchos::RCP<Teuchos::ParameterList>& params)
1500 typedef LocalOrdinal LO;
1501 typedef GlobalOrdinal GO;
1503 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1504 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1506 #ifdef HAVE_TPETRA_MMM_TIMINGS
1507 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1508 using Teuchos::TimeMonitor;
1509 RCP<TimeMonitor> MM = rcp (
new TimeMonitor
1510 (*TimeMonitor::getNewTimer (prefix_mmm +
"MMM-T Transpose")));
1516 transposer_type transposer (rcpFromRef (A), label + std::string(
"XP: "));
1518 using Teuchos::ParameterList;
1519 RCP<ParameterList> transposeParams (
new ParameterList);
1520 transposeParams->set (
"sort",
false);
1521 if(! params.is_null ()) {
1522 transposeParams->set (
"compute global constants",
1523 params->get (
"compute global constants: temporaries",
1526 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1527 transposer.createTransposeLocal (transposeParams);
1532 #ifdef HAVE_TPETRA_MMM_TIMINGS
1534 MM = rcp (
new TimeMonitor
1535 (*TimeMonitor::getNewTimer (prefix_mmm + std::string (
"MMM-T I&X"))));
1539 crs_matrix_struct_type Aview;
1540 crs_matrix_struct_type Bview;
1541 RCP<const Import<LO, GO, NO> > dummyImporter;
1544 RCP<Teuchos::ParameterList> importParams1 (
new ParameterList);
1545 if (! params.is_null ()) {
1546 importParams1->set (
"compute global constants",
1547 params->get (
"compute global constants: temporaries",
1549 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1550 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1551 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1552 int mm_optimization_core_count =
1554 mm_optimization_core_count =
1555 slist.get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1556 int mm_optimization_core_count2 =
1557 params->get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1558 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1559 mm_optimization_core_count = mm_optimization_core_count2;
1561 auto & sip1 = importParams1->sublist (
"matrixmatrix: kernel params",
false);
1562 sip1.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1563 sip1.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1564 sip1.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1567 MMdetails::import_and_extract_views (*Atrans, Atrans->getRowMap (),
1568 Aview, dummyImporter,
true,
1569 label, importParams1);
1571 RCP<ParameterList> importParams2 (
new ParameterList);
1572 if (! params.is_null ()) {
1573 importParams2->set (
"compute global constants",
1574 params->get (
"compute global constants: temporaries",
1576 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1577 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1578 bool overrideAllreduce = slist.get (
"MM_TAFC_OverrideAllreduceCheck",
false);
1579 int mm_optimization_core_count =
1581 mm_optimization_core_count =
1582 slist.get (
"MM_TAFC_OptimizationCoreCount",
1583 mm_optimization_core_count);
1584 int mm_optimization_core_count2 =
1585 params->get (
"MM_TAFC_OptimizationCoreCount",
1586 mm_optimization_core_count);
1587 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1588 mm_optimization_core_count = mm_optimization_core_count2;
1590 auto & sip2 = importParams2->sublist (
"matrixmatrix: kernel params",
false);
1591 sip2.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1592 sip2.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1593 sip2.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1596 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1597 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1600 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1603 #ifdef HAVE_TPETRA_MMM_TIMINGS
1605 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1608 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1611 bool needs_final_export = ! Atrans->getGraph ()->getExporter ().is_null();
1612 if (needs_final_export) {
1616 Ctemp = rcp (&C,
false);
1619 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1624 #ifdef HAVE_TPETRA_MMM_TIMINGS
1626 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1629 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp (&C,
false);
1631 if (needs_final_export) {
1632 ParameterList labelList;
1633 labelList.set(
"Timer Label", label);
1634 if(!params.is_null()) {
1635 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1636 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1638 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1639 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1640 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1641 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1642 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1643 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1645 labelList_subList.set (
"isMatrixMatrix_TransferAndFillComplete", isMM,
1646 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1647 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1648 labelList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1650 Ctemp->exportAndFillComplete (Crcp,
1651 *Ctemp->getGraph ()->getExporter (),
1654 rcp (&labelList,
false));
1656 #ifdef HAVE_TPETRA_MMM_STATISTICS
1657 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1663 template<
class Scalar,
1665 class GlobalOrdinal,
1668 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1669 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1670 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1671 const std::string& ,
1672 const Teuchos::RCP<Teuchos::ParameterList>& )
1674 using Teuchos::Array;
1675 using Teuchos::ArrayRCP;
1676 using Teuchos::ArrayView;
1677 using Teuchos::OrdinalTraits;
1678 using Teuchos::null;
1680 typedef Teuchos::ScalarTraits<Scalar> STS;
1682 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1683 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1685 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1686 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1688 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
1689 ArrayView<const GlobalOrdinal> bcols_import = null;
1690 if (Bview.importColMap != null) {
1691 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1692 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1694 bcols_import = Bview.importColMap->getNodeElementList();
1697 size_t C_numCols = C_lastCol - C_firstCol +
1698 OrdinalTraits<LocalOrdinal>::one();
1699 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1700 OrdinalTraits<LocalOrdinal>::one();
1702 if (C_numCols_import > C_numCols)
1703 C_numCols = C_numCols_import;
1705 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1706 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1707 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1709 Array<Scalar> C_row_i = dwork;
1710 Array<GlobalOrdinal> C_cols = iwork;
1711 Array<size_t> c_index = iwork2;
1712 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1713 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1715 size_t C_row_i_length, j, k, last_index;
1718 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1719 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1720 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1721 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1723 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1724 Aview.colMap->getMaxLocalIndex(); i++)
1729 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1730 Aview.colMap->getMaxLocalIndex(); i++) {
1731 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1732 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1733 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1734 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1744 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1745 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1746 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1747 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1748 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1749 ArrayView<const Scalar> Avals, Bvals, Ivals;
1750 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1751 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1752 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1753 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1754 if(!Bview.importMatrix.is_null()) {
1755 Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1756 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1759 bool C_filled = C.isFillComplete();
1761 for (
size_t i = 0; i < C_numCols; i++)
1762 c_index[i] = OrdinalTraits<size_t>::invalid();
1765 size_t Arows = Aview.rowMap->getNodeNumElements();
1766 for(
size_t i=0; i<Arows; ++i) {
1770 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1776 C_row_i_length = OrdinalTraits<size_t>::zero();
1778 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1779 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1780 const Scalar Aval = Avals[k];
1781 if (Aval == STS::zero())
1784 if (Ak == LO_INVALID)
1787 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1788 LocalOrdinal col = Bcolind[j];
1791 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1794 C_row_i[C_row_i_length] = Aval*Bvals[j];
1795 C_cols[C_row_i_length] = col;
1796 c_index[col] = C_row_i_length;
1800 C_row_i[c_index[col]] += Aval*Bvals[j];
1805 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1806 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1807 C_cols[ii] = bcols[C_cols[ii]];
1808 combined_index[ii] = C_cols[ii];
1809 combined_values[ii] = C_row_i[ii];
1811 last_index = C_row_i_length;
1817 C_row_i_length = OrdinalTraits<size_t>::zero();
1819 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1820 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1821 const Scalar Aval = Avals[k];
1822 if (Aval == STS::zero())
1825 if (Ak!=LO_INVALID)
continue;
1827 Ak = Acol2Irow[Acolind[k]];
1828 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1829 LocalOrdinal col = Icolind[j];
1832 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1835 C_row_i[C_row_i_length] = Aval*Ivals[j];
1836 C_cols[C_row_i_length] = col;
1837 c_index[col] = C_row_i_length;
1842 C_row_i[c_index[col]] += Aval*Ivals[j];
1847 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1848 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1849 C_cols[ii] = bcols_import[C_cols[ii]];
1850 combined_index[last_index] = C_cols[ii];
1851 combined_values[last_index] = C_row_i[ii];
1858 C.sumIntoGlobalValues(
1860 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1861 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1863 C.insertGlobalValues(
1865 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1866 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1872 template<
class Scalar,
1874 class GlobalOrdinal,
1876 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1877 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1878 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1880 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1881 Mview.maxNumRowEntries = Mview.indices[0].size();
1883 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1884 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1885 Mview.maxNumRowEntries = Mview.indices[i].size();
1890 template<
class CrsMatrixType>
1891 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1893 size_t Aest = 100, Best=100;
1894 if (A.getNodeNumEntries() > 0)
1895 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumRows() : 100;
1896 if (B.getNodeNumEntries() > 0)
1897 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumRows() : 100;
1899 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1900 nnzperrow *= nnzperrow;
1902 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1912 template<
class Scalar,
1914 class GlobalOrdinal,
1916 void mult_A_B_newmatrix(
1917 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1918 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1919 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1920 const std::string& label,
1921 const Teuchos::RCP<Teuchos::ParameterList>& params)
1923 using Teuchos::Array;
1924 using Teuchos::ArrayRCP;
1925 using Teuchos::ArrayView;
1930 typedef LocalOrdinal LO;
1931 typedef GlobalOrdinal GO;
1933 typedef Import<LO,GO,NO> import_type;
1934 typedef Map<LO,GO,NO> map_type;
1937 typedef typename map_type::local_map_type local_map_type;
1939 typedef typename KCRS::StaticCrsGraphType graph_t;
1940 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1941 typedef typename NO::execution_space execution_space;
1942 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1943 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1945 #ifdef HAVE_TPETRA_MMM_TIMINGS
1946 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1947 using Teuchos::TimeMonitor;
1948 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1950 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1953 RCP<const import_type> Cimport;
1954 RCP<const map_type> Ccolmap;
1955 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1956 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1957 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1958 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1959 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1960 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1961 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1962 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1969 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
1971 if (Bview.importMatrix.is_null()) {
1974 Ccolmap = Bview.colMap;
1975 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getNodeNumElements());
1977 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1978 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1979 KOKKOS_LAMBDA(
const LO i) {
1992 if (!Bimport.is_null() && !Iimport.is_null()) {
1993 Cimport = Bimport->setUnion(*Iimport);
1995 else if (!Bimport.is_null() && Iimport.is_null()) {
1996 Cimport = Bimport->setUnion();
1998 else if (Bimport.is_null() && !Iimport.is_null()) {
1999 Cimport = Iimport->setUnion();
2002 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
2004 Ccolmap = Cimport->getTargetMap();
2009 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2010 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2017 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2018 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2019 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2020 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2022 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2023 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2032 C.replaceColMap(Ccolmap);
2050 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2051 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2053 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2054 GO aidx = Acolmap_local.getGlobalElement(i);
2055 LO B_LID = Browmap_local.getLocalElement(aidx);
2056 if (B_LID != LO_INVALID) {
2057 targetMapToOrigRow(i) = B_LID;
2058 targetMapToImportRow(i) = LO_INVALID;
2060 LO I_LID = Irowmap_local.getLocalElement(aidx);
2061 targetMapToOrigRow(i) = LO_INVALID;
2062 targetMapToImportRow(i) = I_LID;
2069 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2076 template<
class Scalar,
2078 class GlobalOrdinal,
2080 class LocalOrdinalViewType>
2081 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2082 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2083 const LocalOrdinalViewType & targetMapToOrigRow,
2084 const LocalOrdinalViewType & targetMapToImportRow,
2085 const LocalOrdinalViewType & Bcol2Ccol,
2086 const LocalOrdinalViewType & Icol2Ccol,
2087 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2088 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2089 const std::string& label,
2090 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2091 #ifdef HAVE_TPETRA_MMM_TIMINGS
2092 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2093 using Teuchos::TimeMonitor;
2094 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
2097 using Teuchos::Array;
2098 using Teuchos::ArrayRCP;
2099 using Teuchos::ArrayView;
2105 typedef typename KCRS::StaticCrsGraphType graph_t;
2106 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2107 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2108 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2109 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2112 typedef LocalOrdinal LO;
2113 typedef GlobalOrdinal GO;
2115 typedef Map<LO,GO,NO> map_type;
2116 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2117 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2118 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2121 RCP<const map_type> Ccolmap = C.getColMap();
2122 size_t m = Aview.origMatrix->getNodeNumRows();
2123 size_t n = Ccolmap->getNodeNumElements();
2124 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
2127 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2128 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2130 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2131 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2132 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2134 c_lno_view_t Irowptr;
2135 lno_nnz_view_t Icolind;
2136 scalar_view_t Ivals;
2137 if(!Bview.importMatrix.is_null()) {
2138 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2139 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2140 Ivals = Bview.importMatrix->getLocalMatrix().values;
2141 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
2144 #ifdef HAVE_TPETRA_MMM_TIMINGS
2145 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2155 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2156 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2157 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2158 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2168 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2169 std::vector<size_t> c_status(n, ST_INVALID);
2179 size_t CSR_ip = 0, OLD_ip = 0;
2180 for (
size_t i = 0; i < m; i++) {
2183 Crowptr[i] = CSR_ip;
2186 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2187 LO Aik = Acolind[k];
2188 const SC Aval = Avals[k];
2189 if (Aval == SC_ZERO)
2192 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2199 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2202 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2203 LO Bkj = Bcolind[j];
2204 LO Cij = Bcol2Ccol[Bkj];
2206 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2208 c_status[Cij] = CSR_ip;
2209 Ccolind[CSR_ip] = Cij;
2210 Cvals[CSR_ip] = Aval*Bvals[j];
2214 Cvals[c_status[Cij]] += Aval*Bvals[j];
2225 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2226 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2227 LO Ikj = Icolind[j];
2228 LO Cij = Icol2Ccol[Ikj];
2230 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2232 c_status[Cij] = CSR_ip;
2233 Ccolind[CSR_ip] = Cij;
2234 Cvals[CSR_ip] = Aval*Ivals[j];
2237 Cvals[c_status[Cij]] += Aval*Ivals[j];
2244 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2246 Kokkos::resize(Ccolind,CSR_alloc);
2247 Kokkos::resize(Cvals,CSR_alloc);
2252 Crowptr[m] = CSR_ip;
2255 Kokkos::resize(Ccolind,CSR_ip);
2256 Kokkos::resize(Cvals,CSR_ip);
2258 #ifdef HAVE_TPETRA_MMM_TIMINGS
2260 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort")));
2264 if (params.is_null() || params->get(
"sort entries",
true))
2265 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2266 C.setAllValues(Crowptr,Ccolind, Cvals);
2269 #ifdef HAVE_TPETRA_MMM_TIMINGS
2271 auto MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC")));
2283 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2284 labelList->set(
"Timer Label",label);
2285 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2286 RCP<const Export<LO,GO,NO> > dummyExport;
2287 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2288 #ifdef HAVE_TPETRA_MMM_TIMINGS
2290 MM2 = Teuchos::null;
2300 template<
class Scalar,
2302 class GlobalOrdinal,
2304 void mult_A_B_reuse(
2305 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2306 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2307 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2308 const std::string& label,
2309 const Teuchos::RCP<Teuchos::ParameterList>& params)
2311 using Teuchos::Array;
2312 using Teuchos::ArrayRCP;
2313 using Teuchos::ArrayView;
2318 typedef LocalOrdinal LO;
2319 typedef GlobalOrdinal GO;
2321 typedef Import<LO,GO,NO> import_type;
2322 typedef Map<LO,GO,NO> map_type;
2325 typedef typename map_type::local_map_type local_map_type;
2327 typedef typename KCRS::StaticCrsGraphType graph_t;
2328 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2329 typedef typename NO::execution_space execution_space;
2330 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2331 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2333 #ifdef HAVE_TPETRA_MMM_TIMINGS
2334 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2335 using Teuchos::TimeMonitor;
2336 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2338 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2341 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2342 RCP<const map_type> Ccolmap = C.getColMap();
2343 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2344 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2345 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2346 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2347 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2348 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2351 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2355 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2356 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2359 if (!Bview.importMatrix.is_null()) {
2360 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2361 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2363 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2364 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2365 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2371 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2372 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2373 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2374 GO aidx = Acolmap_local.getGlobalElement(i);
2375 LO B_LID = Browmap_local.getLocalElement(aidx);
2376 if (B_LID != LO_INVALID) {
2377 targetMapToOrigRow(i) = B_LID;
2378 targetMapToImportRow(i) = LO_INVALID;
2380 LO I_LID = Irowmap_local.getLocalElement(aidx);
2381 targetMapToOrigRow(i) = LO_INVALID;
2382 targetMapToImportRow(i) = I_LID;
2389 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2393 template<
class Scalar,
2395 class GlobalOrdinal,
2397 class LocalOrdinalViewType>
2398 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2399 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2400 const LocalOrdinalViewType & targetMapToOrigRow,
2401 const LocalOrdinalViewType & targetMapToImportRow,
2402 const LocalOrdinalViewType & Bcol2Ccol,
2403 const LocalOrdinalViewType & Icol2Ccol,
2404 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2405 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2406 const std::string& label,
2407 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2408 #ifdef HAVE_TPETRA_MMM_TIMINGS
2409 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2410 using Teuchos::TimeMonitor;
2411 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2412 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2422 typedef typename KCRS::StaticCrsGraphType graph_t;
2423 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2424 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2425 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2428 typedef LocalOrdinal LO;
2429 typedef GlobalOrdinal GO;
2431 typedef Map<LO,GO,NO> map_type;
2432 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2433 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2434 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2437 RCP<const map_type> Ccolmap = C.getColMap();
2438 size_t m = Aview.origMatrix->getNodeNumRows();
2439 size_t n = Ccolmap->getNodeNumElements();
2442 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2443 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2444 const KCRS & Cmat = C.getLocalMatrix();
2446 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2447 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2448 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2449 scalar_view_t Cvals = Cmat.values;
2451 c_lno_view_t Irowptr;
2452 lno_nnz_view_t Icolind;
2453 scalar_view_t Ivals;
2454 if(!Bview.importMatrix.is_null()) {
2455 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2456 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2457 Ivals = Bview.importMatrix->getLocalMatrix().values;
2460 #ifdef HAVE_TPETRA_MMM_TIMINGS
2461 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2473 std::vector<size_t> c_status(n, ST_INVALID);
2476 size_t CSR_ip = 0, OLD_ip = 0;
2477 for (
size_t i = 0; i < m; i++) {
2480 OLD_ip = Crowptr[i];
2481 CSR_ip = Crowptr[i+1];
2482 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2483 c_status[Ccolind[k]] = k;
2489 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2490 LO Aik = Acolind[k];
2491 const SC Aval = Avals[k];
2492 if (Aval == SC_ZERO)
2495 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2497 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2499 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2500 LO Bkj = Bcolind[j];
2501 LO Cij = Bcol2Ccol[Bkj];
2503 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2504 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2505 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2507 Cvals[c_status[Cij]] += Aval * Bvals[j];
2512 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2513 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2514 LO Ikj = Icolind[j];
2515 LO Cij = Icol2Ccol[Ikj];
2517 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2518 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2519 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2521 Cvals[c_status[Cij]] += Aval * Ivals[j];
2527 #ifdef HAVE_TPETRA_MMM_TIMINGS
2528 auto MM3 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2531 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2537 template<
class Scalar,
2539 class GlobalOrdinal,
2541 void jacobi_A_B_newmatrix(
2543 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2544 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2545 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2546 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2547 const std::string& label,
2548 const Teuchos::RCP<Teuchos::ParameterList>& params)
2550 using Teuchos::Array;
2551 using Teuchos::ArrayRCP;
2552 using Teuchos::ArrayView;
2556 typedef LocalOrdinal LO;
2557 typedef GlobalOrdinal GO;
2560 typedef Import<LO,GO,NO> import_type;
2561 typedef Map<LO,GO,NO> map_type;
2562 typedef typename map_type::local_map_type local_map_type;
2566 typedef typename KCRS::StaticCrsGraphType graph_t;
2567 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2568 typedef typename NO::execution_space execution_space;
2569 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2570 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2573 #ifdef HAVE_TPETRA_MMM_TIMINGS
2574 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2575 using Teuchos::TimeMonitor;
2576 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2578 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2581 RCP<const import_type> Cimport;
2582 RCP<const map_type> Ccolmap;
2583 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2584 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2585 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2586 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2587 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2588 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2589 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2590 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2597 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2599 if (Bview.importMatrix.is_null()) {
2602 Ccolmap = Bview.colMap;
2606 Kokkos::RangePolicy<execution_space, LO> range (0, static_cast<LO> (Bview.colMap->getNodeNumElements ()));
2607 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2608 Bcol2Ccol(i) =
static_cast<LO
> (i);
2619 if (!Bimport.is_null() && !Iimport.is_null()){
2620 Cimport = Bimport->setUnion(*Iimport);
2621 Ccolmap = Cimport->getTargetMap();
2623 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2624 Cimport = Bimport->setUnion();
2626 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2627 Cimport = Iimport->setUnion();
2630 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2632 Ccolmap = Cimport->getTargetMap();
2634 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2635 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2642 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2643 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2644 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2645 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2647 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2648 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2657 C.replaceColMap(Ccolmap);
2675 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2676 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2677 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2678 GO aidx = Acolmap_local.getGlobalElement(i);
2679 LO B_LID = Browmap_local.getLocalElement(aidx);
2680 if (B_LID != LO_INVALID) {
2681 targetMapToOrigRow(i) = B_LID;
2682 targetMapToImportRow(i) = LO_INVALID;
2684 LO I_LID = Irowmap_local.getLocalElement(aidx);
2685 targetMapToOrigRow(i) = LO_INVALID;
2686 targetMapToImportRow(i) = I_LID;
2693 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2702 template<
class Scalar,
2704 class GlobalOrdinal,
2706 class LocalOrdinalViewType>
2707 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2708 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2709 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2710 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2711 const LocalOrdinalViewType & targetMapToOrigRow,
2712 const LocalOrdinalViewType & targetMapToImportRow,
2713 const LocalOrdinalViewType & Bcol2Ccol,
2714 const LocalOrdinalViewType & Icol2Ccol,
2715 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2716 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2717 const std::string& label,
2718 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2720 #ifdef HAVE_TPETRA_MMM_TIMINGS
2721 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2722 using Teuchos::TimeMonitor;
2723 auto MM(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore")));
2726 using Teuchos::Array;
2727 using Teuchos::ArrayRCP;
2728 using Teuchos::ArrayView;
2734 typedef typename KCRS::StaticCrsGraphType graph_t;
2735 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2736 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2737 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2738 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2741 typedef typename scalar_view_t::memory_space scalar_memory_space;
2744 typedef LocalOrdinal LO;
2745 typedef GlobalOrdinal GO;
2748 typedef Map<LO,GO,NO> map_type;
2749 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2750 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2753 RCP<const map_type> Ccolmap = C.getColMap();
2754 size_t m = Aview.origMatrix->getNodeNumRows();
2755 size_t n = Ccolmap->getNodeNumElements();
2756 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
2759 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2760 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2762 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2763 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2764 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2766 c_lno_view_t Irowptr;
2767 lno_nnz_view_t Icolind;
2768 scalar_view_t Ivals;
2769 if(!Bview.importMatrix.is_null()) {
2770 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2771 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2772 Ivals = Bview.importMatrix->getLocalMatrix().values;
2773 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
2777 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2785 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2786 Array<size_t> c_status(n, ST_INVALID);
2795 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2796 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2797 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2798 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2799 size_t CSR_ip = 0, OLD_ip = 0;
2801 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2815 for (
size_t i = 0; i < m; i++) {
2818 Crowptr[i] = CSR_ip;
2819 SC minusOmegaDval = -omega*Dvals(i,0);
2822 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2823 Scalar Bval = Bvals[j];
2824 if (Bval == SC_ZERO)
2826 LO Bij = Bcolind[j];
2827 LO Cij = Bcol2Ccol[Bij];
2830 c_status[Cij] = CSR_ip;
2831 Ccolind[CSR_ip] = Cij;
2832 Cvals[CSR_ip] = Bvals[j];
2837 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2838 LO Aik = Acolind[k];
2839 const SC Aval = Avals[k];
2840 if (Aval == SC_ZERO)
2843 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2845 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2847 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2848 LO Bkj = Bcolind[j];
2849 LO Cij = Bcol2Ccol[Bkj];
2851 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2853 c_status[Cij] = CSR_ip;
2854 Ccolind[CSR_ip] = Cij;
2855 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2859 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2865 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2866 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2867 LO Ikj = Icolind[j];
2868 LO Cij = Icol2Ccol[Ikj];
2870 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2872 c_status[Cij] = CSR_ip;
2873 Ccolind[CSR_ip] = Cij;
2874 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2877 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2884 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2886 Kokkos::resize(Ccolind,CSR_alloc);
2887 Kokkos::resize(Cvals,CSR_alloc);
2891 Crowptr[m] = CSR_ip;
2894 Kokkos::resize(Ccolind,CSR_ip);
2895 Kokkos::resize(Cvals,CSR_ip);
2898 #ifdef HAVE_TPETRA_MMM_TIMINGS
2899 auto MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort")));
2906 C.replaceColMap(Ccolmap);
2913 if (params.is_null() || params->get(
"sort entries",
true))
2914 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2915 C.setAllValues(Crowptr,Ccolind, Cvals);
2918 #ifdef HAVE_TPETRA_MMM_TIMINGS
2919 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC")));
2930 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2931 labelList->set(
"Timer Label",label);
2932 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2933 RCP<const Export<LO,GO,NO> > dummyExport;
2934 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2942 template<
class Scalar,
2944 class GlobalOrdinal,
2946 void jacobi_A_B_reuse(
2948 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2949 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2950 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2951 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2952 const std::string& label,
2953 const Teuchos::RCP<Teuchos::ParameterList>& params)
2955 using Teuchos::Array;
2956 using Teuchos::ArrayRCP;
2957 using Teuchos::ArrayView;
2961 typedef LocalOrdinal LO;
2962 typedef GlobalOrdinal GO;
2965 typedef Import<LO,GO,NO> import_type;
2966 typedef Map<LO,GO,NO> map_type;
2969 typedef typename map_type::local_map_type local_map_type;
2971 typedef typename KCRS::StaticCrsGraphType graph_t;
2972 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2973 typedef typename NO::execution_space execution_space;
2974 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2975 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2977 #ifdef HAVE_TPETRA_MMM_TIMINGS
2978 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2979 using Teuchos::TimeMonitor;
2980 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2982 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2985 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2986 RCP<const map_type> Ccolmap = C.getColMap();
2987 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2988 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2989 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2990 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2991 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2992 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2995 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2999 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
3000 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
3003 if (!Bview.importMatrix.is_null()) {
3004 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
3005 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
3007 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
3008 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
3009 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
3015 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
3016 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
3017 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
3018 GO aidx = Acolmap_local.getGlobalElement(i);
3019 LO B_LID = Browmap_local.getLocalElement(aidx);
3020 if (B_LID != LO_INVALID) {
3021 targetMapToOrigRow(i) = B_LID;
3022 targetMapToImportRow(i) = LO_INVALID;
3024 LO I_LID = Irowmap_local.getLocalElement(aidx);
3025 targetMapToOrigRow(i) = LO_INVALID;
3026 targetMapToImportRow(i) = I_LID;
3031 #ifdef HAVE_TPETRA_MMM_TIMINGS
3037 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
3043 template<
class Scalar,
3045 class GlobalOrdinal,
3047 class LocalOrdinalViewType>
3048 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
3049 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
3050 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3051 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3052 const LocalOrdinalViewType & targetMapToOrigRow,
3053 const LocalOrdinalViewType & targetMapToImportRow,
3054 const LocalOrdinalViewType & Bcol2Ccol,
3055 const LocalOrdinalViewType & Icol2Ccol,
3056 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
3057 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
3058 const std::string& label,
3059 const Teuchos::RCP<Teuchos::ParameterList>& ) {
3060 #ifdef HAVE_TPETRA_MMM_TIMINGS
3061 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3062 using Teuchos::TimeMonitor;
3063 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
3064 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
3073 typedef typename KCRS::StaticCrsGraphType graph_t;
3074 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
3075 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3076 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3077 typedef typename scalar_view_t::memory_space scalar_memory_space;
3080 typedef LocalOrdinal LO;
3081 typedef GlobalOrdinal GO;
3083 typedef Map<LO,GO,NO> map_type;
3084 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3085 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3086 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
3089 RCP<const map_type> Ccolmap = C.getColMap();
3090 size_t m = Aview.origMatrix->getNodeNumRows();
3091 size_t n = Ccolmap->getNodeNumElements();
3094 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
3095 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
3096 const KCRS & Cmat = C.getLocalMatrix();
3098 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
3099 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
3100 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
3101 scalar_view_t Cvals = Cmat.values;
3103 c_lno_view_t Irowptr;
3104 lno_nnz_view_t Icolind;
3105 scalar_view_t Ivals;
3106 if(!Bview.importMatrix.is_null()) {
3107 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
3108 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
3109 Ivals = Bview.importMatrix->getLocalMatrix().values;
3113 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
3115 #ifdef HAVE_TPETRA_MMM_TIMINGS
3116 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
3124 std::vector<size_t> c_status(n, ST_INVALID);
3127 size_t CSR_ip = 0, OLD_ip = 0;
3128 for (
size_t i = 0; i < m; i++) {
3132 OLD_ip = Crowptr[i];
3133 CSR_ip = Crowptr[i+1];
3134 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
3135 c_status[Ccolind[k]] = k;
3141 SC minusOmegaDval = -omega*Dvals(i,0);
3144 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
3145 Scalar Bval = Bvals[j];
3146 if (Bval == SC_ZERO)
3148 LO Bij = Bcolind[j];
3149 LO Cij = Bcol2Ccol[Bij];
3151 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3152 std::runtime_error,
"Trying to insert a new entry into a static graph");
3154 Cvals[c_status[Cij]] = Bvals[j];
3158 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3159 LO Aik = Acolind[k];
3160 const SC Aval = Avals[k];
3161 if (Aval == SC_ZERO)
3164 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3166 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3168 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3169 LO Bkj = Bcolind[j];
3170 LO Cij = Bcol2Ccol[Bkj];
3172 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3173 std::runtime_error,
"Trying to insert a new entry into a static graph");
3175 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3180 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3181 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3182 LO Ikj = Icolind[j];
3183 LO Cij = Icol2Ccol[Ikj];
3185 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3186 std::runtime_error,
"Trying to insert a new entry into a static graph");
3188 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3194 #ifdef HAVE_TPETRA_MMM_TIMINGS
3195 MM2 = Teuchos::null;
3196 MM2 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3199 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3200 #ifdef HAVE_TPETRA_MMM_TIMINGS
3201 MM2 = Teuchos::null;
3210 template<
class Scalar,
3212 class GlobalOrdinal,
3214 void import_and_extract_views(
3215 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3216 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3217 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3218 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3219 bool userAssertsThereAreNoRemotes,
3220 const std::string& label,
3221 const Teuchos::RCP<Teuchos::ParameterList>& params)
3223 using Teuchos::Array;
3224 using Teuchos::ArrayView;
3227 using Teuchos::null;
3230 typedef LocalOrdinal LO;
3231 typedef GlobalOrdinal GO;
3234 typedef Map<LO,GO,NO> map_type;
3235 typedef Import<LO,GO,NO> import_type;
3236 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3238 #ifdef HAVE_TPETRA_MMM_TIMINGS
3239 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3240 using Teuchos::TimeMonitor;
3241 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3249 Aview.deleteContents();
3251 Aview.origMatrix = rcp(&A,
false);
3252 Aview.origRowMap = A.getRowMap();
3253 Aview.rowMap = targetMap;
3254 Aview.colMap = A.getColMap();
3255 Aview.domainMap = A.getDomainMap();
3256 Aview.importColMap = null;
3259 if (userAssertsThereAreNoRemotes)
3262 RCP<const import_type> importer;
3263 if (params != null && params->isParameter(
"importer")) {
3264 importer = params->get<RCP<const import_type> >(
"importer");
3267 #ifdef HAVE_TPETRA_MMM_TIMINGS
3269 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3274 RCP<const map_type> rowMap = A.getRowMap(), remoteRowMap;
3275 size_t numRemote = 0;
3277 if (!prototypeImporter.is_null() &&
3278 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3279 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3281 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3282 numRemote = prototypeImporter->getNumRemoteIDs();
3284 Array<GO> remoteRows(numRemote);
3285 for (
size_t i = 0; i < numRemote; i++)
3286 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3288 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3289 rowMap->getIndexBase(), rowMap->getComm()));
3292 }
else if (prototypeImporter.is_null()) {
3294 ArrayView<const GO> rows = targetMap->getNodeElementList();
3295 size_t numRows = targetMap->getNodeNumElements();
3297 Array<GO> remoteRows(numRows);
3298 for(
size_t i = 0; i < numRows; ++i) {
3299 const LO mlid = rowMap->getLocalElement(rows[i]);
3301 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3302 remoteRows[numRemote++] = rows[i];
3304 remoteRows.resize(numRemote);
3305 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3306 rowMap->getIndexBase(), rowMap->getComm()));
3314 const int numProcs = rowMap->getComm()->getSize();
3317 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3318 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3327 #ifdef HAVE_TPETRA_MMM_TIMINGS
3329 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3333 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3335 if (globalMaxNumRemote > 0) {
3336 #ifdef HAVE_TPETRA_MMM_TIMINGS
3338 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3342 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3344 importer = rcp(
new import_type(rowMap, remoteRowMap));
3346 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3350 params->set(
"importer", importer);
3353 if (importer != null) {
3354 #ifdef HAVE_TPETRA_MMM_TIMINGS
3356 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3360 Teuchos::ParameterList labelList;
3361 labelList.set(
"Timer Label", label);
3362 auto & labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3365 bool overrideAllreduce =
false;
3368 Teuchos::ParameterList params_sublist;
3369 if(!params.is_null()) {
3370 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3371 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3372 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3373 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3374 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
3375 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3376 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3378 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
3379 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3380 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
3382 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3383 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3389 sprintf(str,
"import_matrix.%d.dat",count);
3394 #ifdef HAVE_TPETRA_MMM_STATISTICS
3395 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3399 #ifdef HAVE_TPETRA_MMM_TIMINGS
3401 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3405 Aview.importColMap = Aview.importMatrix->getColMap();
3406 #ifdef HAVE_TPETRA_MMM_TIMINGS
3418 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3420 merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3421 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3422 const LocalOrdinalViewType & Acol2Brow,
3423 const LocalOrdinalViewType & Acol2Irow,
3424 const LocalOrdinalViewType & Bcol2Ccol,
3425 const LocalOrdinalViewType & Icol2Ccol,
3426 const size_t mergedNodeNumCols) {
3430 typedef typename KCRS::StaticCrsGraphType graph_t;
3431 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3432 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3433 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3435 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
3436 const KCRS & Bk = Bview.origMatrix->getLocalMatrix();
3439 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3443 RCP<const KCRS> Ik_;
3444 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrix());
3445 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3447 if(Ik!=0) Iks = *Ik;
3448 size_t merge_numrows = Ak.numCols();
3450 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3452 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3455 typedef typename Node::execution_space execution_space;
3456 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3457 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3458 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3459 if(
final) Mrowptr(i) = update;
3462 if(Acol2Brow(i)!=LO_INVALID)
3463 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3465 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3468 if(
final && i+1==merge_numrows)
3469 Mrowptr(i+1)=update;
3473 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3474 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3475 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz);
3478 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3479 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3480 if(Acol2Brow(i)!=LO_INVALID) {
3481 size_t row = Acol2Brow(i);
3482 size_t start = Bk.graph.row_map(row);
3483 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3484 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3485 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3489 size_t row = Acol2Irow(i);
3490 size_t start = Iks.graph.row_map(row);
3491 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3492 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3493 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3498 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3521 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3523 void MatrixMatrix::Multiply( \
3524 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3526 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3528 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3529 bool call_FillComplete_on_result, \
3530 const std::string & label, \
3531 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3534 void MatrixMatrix::Jacobi( \
3536 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3537 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3538 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3539 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3540 bool call_FillComplete_on_result, \
3541 const std::string & label, \
3542 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3545 void MatrixMatrix::Add( \
3546 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3549 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3552 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
3555 void MatrixMatrix::Add( \
3556 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3559 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3563 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3564 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3565 (const SCALAR & alpha, \
3566 const bool transposeA, \
3567 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3568 const SCALAR & beta, \
3569 const bool transposeB, \
3570 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3571 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3572 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3573 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3575 template struct MatrixMatrix::AddDetails::AddKernels<SCALAR, LO, GO, NODE>;
3579 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static int TAFC_OptimizationCoreCount()
The core count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced nei...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
A distributed dense vector.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Matrix Market file readers and writers for Tpetra objects.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.