41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
42 #define TPETRA_MATRIXMATRIX_DEF_HPP
43 #include "Tpetra_ConfigDefs.hpp"
45 #include "Teuchos_VerboseObject.hpp"
46 #include "Teuchos_Array.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
50 #include "Tpetra_RowMatrixTransposer.hpp"
52 #include "Tpetra_Details_radixSort.hpp"
54 #include "Tpetra_ConfigDefs.hpp"
55 #include "Tpetra_Map.hpp"
56 #include "Tpetra_Export.hpp"
60 #include <type_traits>
61 #include "Teuchos_FancyOStream.hpp"
63 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
66 #include "KokkosSparse_spgemm.hpp"
67 #include "KokkosSparse_spadd.hpp"
81 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
82 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
87 namespace MatrixMatrix{
95 template <
class Scalar,
105 bool call_FillComplete_on_result,
106 const std::string& label,
107 const Teuchos::RCP<Teuchos::ParameterList>& params)
112 typedef LocalOrdinal LO;
113 typedef GlobalOrdinal GO;
121 #ifdef HAVE_TPETRA_MMM_TIMINGS
122 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
123 using Teuchos::TimeMonitor;
124 TimeMonitor MM(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup")));
127 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
132 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
133 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
138 RCP<const crs_matrix_type> Aprime = null;
142 RCP<const crs_matrix_type> Bprime = null;
152 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
154 bool use_optimized_ATB =
false;
155 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
156 use_optimized_ATB =
true;
158 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
159 use_optimized_ATB =
false;
162 if (!use_optimized_ATB && transposeA) {
163 transposer_type transposer(rcpFromRef (A));
164 Aprime = transposer.createTranspose();
167 Aprime = rcpFromRef(A);
171 transposer_type transposer(rcpFromRef (B));
172 Bprime = transposer.createTranspose();
175 Bprime = rcpFromRef(B);
185 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
186 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
187 "must match for matrix-matrix product. op(A) is "
188 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
194 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
195 prefix <<
"ERROR, dimensions of result C must "
197 <<
" rows, should have at least " << Aouter << std::endl);
209 int numProcs = A.
getComm()->getSize();
213 crs_matrix_struct_type Aview;
214 crs_matrix_struct_type Bview;
216 RCP<const map_type> targetMap_A = Aprime->getRowMap();
217 RCP<const map_type> targetMap_B = Bprime->getRowMap();
219 #ifdef HAVE_TPETRA_MMM_TIMINGS
221 TimeMonitor MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X")));
227 if (!use_optimized_ATB) {
228 RCP<const import_type> dummyImporter;
229 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
235 targetMap_B = Aprime->getColMap();
238 if (!use_optimized_ATB)
239 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
241 #ifdef HAVE_TPETRA_MMM_TIMINGS
243 TimeMonitor MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply")));
248 if (use_optimized_ATB) {
249 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
251 }
else if (call_FillComplete_on_result && newFlag) {
252 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
254 }
else if (call_FillComplete_on_result) {
255 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
261 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
263 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
265 #ifdef HAVE_TPETRA_MMM_TIMINGS
267 TimeMonitor MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete")));
269 if (call_FillComplete_on_result) {
277 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
283 template <
class Scalar,
292 bool call_FillComplete_on_result,
293 const std::string& label,
294 const Teuchos::RCP<Teuchos::ParameterList>& params)
298 typedef LocalOrdinal LO;
299 typedef GlobalOrdinal GO;
306 #ifdef HAVE_TPETRA_MMM_TIMINGS
307 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
308 using Teuchos::TimeMonitor;
309 TimeMonitor MM(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup")));
312 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
317 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
318 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
320 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
321 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
330 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
331 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
332 "must match for matrix-matrix product. op(A) is "
333 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
339 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
340 prefix <<
"ERROR, dimensions of result C must "
342 <<
" rows, should have at least "<< Aouter << std::endl);
354 int numProcs = A.
getComm()->getSize();
358 crs_matrix_struct_type Aview;
359 crs_matrix_struct_type Bview;
361 RCP<const map_type> targetMap_A = Aprime->getRowMap();
362 RCP<const map_type> targetMap_B = Bprime->getRowMap();
364 #ifdef HAVE_TPETRA_MMM_TIMINGS
365 TimeMonitor MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X")));
371 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
372 if(!params.is_null()) {
373 importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
374 int mm_optimization_core_count=0;
375 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
377 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
378 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
379 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
380 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
381 auto & ip1slist = importParams1->sublist(
"matrixmatrix: kernel params",
false);
382 ip1slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
383 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
384 ip1slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
388 RCP<const import_type> dummyImporter;
389 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label,importParams1);
394 targetMap_B = Aprime->getColMap();
399 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
400 if(!params.is_null()) {
401 importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
403 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
405 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
406 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
407 auto & ip2slist = importParams2->sublist(
"matrixmatrix: kernel params",
false);
408 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
409 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
410 ip2slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
411 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
412 ip2slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
415 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label,importParams2);
417 #ifdef HAVE_TPETRA_MMM_TIMINGS
419 TimeMonitor MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply")));
423 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
426 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
428 if (call_FillComplete_on_result && newFlag) {
429 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
431 }
else if (call_FillComplete_on_result) {
432 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
435 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
458 template <
class Scalar,
469 using Teuchos::Array;
473 typedef LocalOrdinal LO;
474 typedef GlobalOrdinal GO;
479 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
481 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
482 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
483 "(Result matrix B is not required to be isFillComplete()).");
484 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
485 prefix <<
"ERROR, input matrix B must not be fill complete!");
486 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
487 prefix <<
"ERROR, input matrix B must not have static graph!");
489 prefix <<
"ERROR, input matrix B must not be locally indexed!");
491 RCP<const crs_matrix_type> Aprime = null;
493 transposer_type transposer(rcpFromRef (A));
494 Aprime = transposer.createTranspose();
496 Aprime = rcpFromRef(A);
504 if (scalarB != Teuchos::ScalarTraits<SC>::one())
509 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
510 for (LO i = 0; (size_t)i < numMyRows; ++i) {
511 row = B.
getRowMap()->getGlobalElement(i);
512 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
514 if (scalarA != Teuchos::ScalarTraits<SC>::one())
515 for (
size_t j = 0; j < a_numEntries; ++j)
516 a_vals[j] *= scalarA;
526 namespace ColMapFunctors
528 template<
typename ByteView,
typename GView>
531 UnionEntries(ByteView entryUnion_,
const GView gids_) : entryUnion(entryUnion_), gids(gids_) {}
532 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
534 entryUnion(gids(i)) = 1;
540 template<
typename LView,
typename GView>
541 struct ConvertGlobalToLocal
543 ConvertGlobalToLocal(
const LView gtol_,
const GView gids_, LView lids_) : gtol(gtol_), gids(gids_), lids(lids_) {}
544 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
546 lids(i) = gtol(gids(i));
556 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
557 Teuchos::RCP<Map<LocalOrdinal, GlobalOrdinal, Node> > AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
558 makeColMapAndConvertGids(GlobalOrdinal ncols,
559 const typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::global_col_inds_array& gids,
560 typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::col_inds_array& lids,
561 const Teuchos::RCP<
const Teuchos::Comm<int>>& comm)
563 using namespace ColMapFunctors;
566 typedef Kokkos::View<char*, device_type> ByteView;
567 typedef global_col_inds_array GView;
568 typedef col_inds_array LView;
570 auto nentries = gids.extent(0);
572 ByteView entryUnion(
"entry union", ncols);
573 UnionEntries<ByteView, GView> ue(entryUnion, gids);
574 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_unionEntries", range_type(0, nentries), ue);
576 LView gtol(
"global col -> local col", ncols + 1);
577 ::Tpetra::Details::computeOffsetsFromCounts<decltype(gtol), decltype(entryUnion)>(gtol, entryUnion);
579 ConvertGlobalToLocal<LView, GView> cgtl(gtol, gids, lids);
580 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertGlobalToLocal", range_type(0, gids.extent(0)), cgtl);
582 execution_space::fence();
583 GView colmap(
"column map", gtol(ncols));
584 size_t localIter = 0;
585 execution_space::fence();
586 for(
size_t i = 0; i < entryUnion.extent(0); i++)
588 if(entryUnion(i) != 0)
590 colmap(localIter++) = i;
593 execution_space::fence();
595 return rcp(
new map_type(Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), colmap, 0, comm));
598 template <
class Scalar,
602 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
604 const bool transposeA,
607 const bool transposeB,
611 const Teuchos::RCP<Teuchos::ParameterList>& params)
617 Teuchos::RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(CrowMap, 0));
619 add(alpha,transposeA,A,beta,transposeB,B,*C,domainMap,rangeMap,params);
623 template <
class Scalar,
629 const bool transposeA,
632 const bool transposeB,
637 const Teuchos::RCP<Teuchos::ParameterList>& params)
641 using Teuchos::rcpFromRef;
642 using Teuchos::rcp_implicit_cast;
643 using Teuchos::rcp_dynamic_cast;
644 using Teuchos::TimeMonitor;
646 typedef LocalOrdinal LO;
647 typedef GlobalOrdinal GO;
654 typedef AddDetails::AddKernels<SC,LO,GO,NO> AddKern;
655 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
656 constexpr
bool debug =
false;
658 #ifdef HAVE_TPETRA_MMM_TIMINGS
659 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"transpose"))));
663 std::ostringstream os;
664 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
665 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
666 std::cerr << os.str ();
670 prefix_mmm <<
"A and B must both be fill complete.");
671 #ifdef HAVE_TPETRA_DEBUG
674 const bool domainMapsSame =
678 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
679 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
681 const bool rangeMapsSame =
685 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
686 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
688 #endif // HAVE_TPETRA_DEBUG
691 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
693 transposer_type transposer(Aprime);
694 Aprime = transposer.createTranspose ();
696 #ifdef HAVE_TPETRA_DEBUG
697 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
698 prefix_mmm <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
699 #endif // HAVE_TPETRA_DEBUG
701 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
704 std::ostringstream os;
705 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
706 <<
"Form explicit xpose of B" << std::endl;
707 std::cerr << os.str ();
709 transposer_type transposer(Bprime);
710 Bprime = transposer.createTranspose ();
712 #ifdef HAVE_TPETRA_DEBUG
713 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
714 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
715 TEUCHOS_TEST_FOR_EXCEPTION(
716 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
717 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
718 "Please report this bug to the Tpetra developers.");
719 #endif // HAVE_TPETRA_DEBUG
720 RCP<const map_type> CDomainMap = domainMap;
721 RCP<const map_type> CRangeMap = rangeMap;
722 if(CDomainMap.is_null())
724 CDomainMap = Bprime->getDomainMap();
726 if(CRangeMap.is_null())
728 CRangeMap = Bprime->getRangeMap();
730 assert(!(CDomainMap.is_null()));
731 assert(!(CRangeMap.is_null()));
732 typedef typename AddKern::values_array values_array;
733 typedef typename AddKern::row_ptrs_array row_ptrs_array;
734 typedef typename AddKern::col_inds_array col_inds_array;
735 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
736 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
738 row_ptrs_array rowptrs;
739 col_inds_array colinds;
740 auto acolmap = Aprime->getColMap()->getMyGlobalIndices();
741 auto bcolmap = Bprime->getColMap()->getMyGlobalIndices();
742 #ifdef HAVE_TPETRA_MMM_TIMINGS
744 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"rowmap check/import"))));
746 if(!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap()))))
749 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
755 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
757 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
758 bool sorted = AGraphSorted && BGraphSorted;
759 RCP<const map_type> CrowMap;
760 RCP<const map_type> CcolMap;
761 RCP<const import_type> Cimport = Teuchos::null;
762 RCP<export_type> Cexport = Teuchos::null;
764 if(!matchingColMaps && !(CDomainMap->isContiguous()))
767 #ifdef HAVE_TPETRA_MMM_TIMINGS
769 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"fallback to CrsMatrix::add"))));
772 std::ostringstream os;
773 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
774 <<
"Call Bprime->add(...)" << std::endl;
775 std::cerr << os.str ();
777 Teuchos::RCP<crs_matrix_type> C_ = Teuchos::rcp_static_cast<crs_matrix_type>(Bprime->add(alpha, *Aprime, beta, CDomainMap, CRangeMap, params));
779 C.
setAllValues(C_->getLocalMatrix().graph.row_map,C_->getLocalMatrix().graph.entries,C_->getLocalMatrix().values);
780 C.
expertStaticFillComplete(CDomainMap, CRangeMap, C_->getGraph()->getImporter(), C_->getGraph()->getExporter(), params);
783 else if(!matchingColMaps)
785 #ifdef HAVE_TPETRA_MMM_TIMINGS
787 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mismatched col map full kernel"))));
790 auto Acolmap = Aprime->getColMap()->getMyGlobalIndices();
791 auto Bcolmap = Bprime->getColMap()->getMyGlobalIndices();
792 typename AddKern::global_col_inds_array globalColinds(
"", 0);
794 std::ostringstream os;
795 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
796 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
797 std::cerr << os.str ();
799 AddKern::convertToGlobalAndAdd(
800 Aprime->getLocalMatrix(), alpha, Bprime->getLocalMatrix(), beta, Acolmap, Bcolmap,
801 CRangeMap->getMinGlobalIndex(), Aprime->getGlobalNumCols(), vals, rowptrs, globalColinds);
802 colinds = col_inds_array(
"C colinds", globalColinds.extent(0));
804 std::ostringstream os;
805 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
806 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
807 std::cerr << os.str ();
809 #ifdef HAVE_TPETRA_MMM_TIMINGS
811 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"building optimized column map"))));
813 CrowMap = Bprime->getRowMap();
816 CcolMap = AddKern::makeColMapAndConvertGids(Aprime->getGlobalNumCols(), globalColinds, colinds, comm);
821 auto localA = Aprime->getLocalMatrix();
822 auto localB = Bprime->getLocalMatrix();
823 auto Avals = localA.values;
824 auto Bvals = localB.values;
825 auto Arowptrs = localA.graph.row_map;
826 auto Browptrs = localB.graph.row_map;
827 auto Acolinds = localA.graph.entries;
828 auto Bcolinds = localB.graph.entries;
832 #ifdef HAVE_TPETRA_MMM_TIMINGS
834 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"sorted entries full kernel"))));
837 std::ostringstream os;
838 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
839 <<
"Call AddKern::addSorted(...)" << std::endl;
840 std::cerr << os.str ();
842 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, vals, rowptrs, colinds);
847 #ifdef HAVE_TPETRA_MMM_TIMINGS
849 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mm add unsorted entries full kernel"))));
852 std::ostringstream os;
853 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
854 <<
"Call AddKern::addUnsorted(...)" << std::endl;
855 std::cerr << os.str ();
857 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
859 CrowMap = Bprime->getRowMap();
860 CcolMap = Bprime->getColMap();
863 #ifdef HAVE_TPETRA_MMM_TIMINGS
865 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs constructor"))));
870 #ifdef HAVE_TPETRA_MMM_TIMINGS
872 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs expertStaticFillComplete"))));
874 if(!CDomainMap->isSameAs(*CcolMap))
877 std::ostringstream os;
878 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
879 <<
"Create Cimport" << std::endl;
880 std::cerr << os.str ();
882 Cimport = rcp(
new import_type(CDomainMap, CcolMap));
884 if(!CrowMap->isSameAs(*CRangeMap))
887 std::ostringstream os;
888 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
889 <<
"Create Cexport" << std::endl;
890 std::cerr << os.str ();
892 Cexport = rcp(
new export_type(CrowMap, CRangeMap));
896 std::ostringstream os;
897 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
898 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
899 std::cerr << os.str ();
905 template <
class Scalar,
918 using Teuchos::Array;
919 using Teuchos::ArrayRCP;
920 using Teuchos::ArrayView;
923 using Teuchos::rcp_dynamic_cast;
924 using Teuchos::rcpFromRef;
925 using Teuchos::tuple;
928 typedef Teuchos::ScalarTraits<Scalar> STS;
936 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
938 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
939 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
941 TEUCHOS_TEST_FOR_EXCEPTION(
943 prefix <<
"Both input matrices must be fill complete before calling this function.");
945 #ifdef HAVE_TPETRA_DEBUG
947 const bool domainMapsSame =
951 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
952 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
954 const bool rangeMapsSame =
958 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
959 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
961 #endif // HAVE_TPETRA_DEBUG
964 RCP<const crs_matrix_type> Aprime;
966 transposer_type theTransposer (rcpFromRef (A));
967 Aprime = theTransposer.createTranspose ();
969 Aprime = rcpFromRef (A);
972 #ifdef HAVE_TPETRA_DEBUG
973 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
974 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
975 #endif // HAVE_TPETRA_DEBUG
978 RCP<const crs_matrix_type> Bprime;
980 transposer_type theTransposer (rcpFromRef (B));
981 Bprime = theTransposer.createTranspose ();
983 Bprime = rcpFromRef (B);
986 #ifdef HAVE_TPETRA_DEBUG
987 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
988 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
989 #endif // HAVE_TPETRA_DEBUG
992 if (! C.is_null ()) {
993 C->setAllToScalar (STS::zero ());
1001 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
1002 RCP<const map_type> rowMap = Aprime->getRowMap ();
1004 RCP<const crs_graph_type> A_graph =
1005 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
1006 #ifdef HAVE_TPETRA_DEBUG
1007 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
1008 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. "
1009 "Please report this bug to the Tpetra developers.");
1010 #endif // HAVE_TPETRA_DEBUG
1011 RCP<const crs_graph_type> B_graph =
1012 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
1013 #ifdef HAVE_TPETRA_DEBUG
1014 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
1015 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. "
1016 "Please report this bug to the Tpetra developers.");
1017 #endif // HAVE_TPETRA_DEBUG
1018 RCP<const map_type> A_colMap = A_graph->getColMap ();
1019 #ifdef HAVE_TPETRA_DEBUG
1020 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
1021 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. "
1022 "Please report this bug to the Tpetra developers.");
1023 #endif // HAVE_TPETRA_DEBUG
1024 RCP<const map_type> B_colMap = B_graph->getColMap ();
1025 #ifdef HAVE_TPETRA_DEBUG
1026 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
1027 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. "
1028 "Please report this bug to the Tpetra developers.");
1029 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
1031 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. "
1032 "Please report this bug to the Tpetra developers.");
1033 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
1035 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. "
1036 "Please report this bug to the Tpetra developers.");
1037 #endif // HAVE_TPETRA_DEBUG
1040 RCP<const import_type> sumImport =
1041 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
1042 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
1050 ArrayView<const LocalOrdinal> A_local_ind;
1051 Array<GlobalOrdinal> A_global_ind;
1052 ArrayView<const LocalOrdinal> B_local_ind;
1053 Array<GlobalOrdinal> B_global_ind;
1055 const size_t localNumRows = rowMap->getNodeNumElements ();
1056 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
1059 size_t maxNumEntriesPerRow = 0;
1060 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1062 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
1063 const size_type A_numEnt = A_local_ind.size ();
1064 if (A_numEnt > A_global_ind.size ()) {
1065 A_global_ind.resize (A_numEnt);
1068 for (size_type k = 0; k < A_numEnt; ++k) {
1069 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1073 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
1074 const size_type B_numEnt = B_local_ind.size ();
1075 if (B_numEnt > B_global_ind.size ()) {
1076 B_global_ind.resize (B_numEnt);
1079 for (size_type k = 0; k < B_numEnt; ++k) {
1080 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1084 const size_t curNumEntriesPerRow =
1085 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
1086 B_global_ind.begin (), B_global_ind.end ());
1087 numEntriesPerRow[localRow] = curNumEntriesPerRow;
1088 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
1095 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow, StaticProfile));
1100 ArrayView<const Scalar> A_val;
1101 ArrayView<const Scalar> B_val;
1103 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
1104 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
1105 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
1107 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1109 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
1111 for (size_type k = 0; k < A_local_ind.size (); ++k) {
1112 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1116 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
1118 for (size_type k = 0; k < B_local_ind.size (); ++k) {
1119 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1122 const size_t curNumEntries = numEntriesPerRow[localRow];
1123 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
1124 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
1125 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
1129 A_val.begin (), A_val.end (),
1130 B_global_ind.begin (), B_global_ind.end (),
1131 B_val.begin (), B_val.end (),
1132 C_global_ind.begin (), C_val.begin (),
1133 std::plus<Scalar> ());
1135 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
1136 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
1139 C->replaceLocalValues (localRow, C_local_ind, C_val);
1144 RCP<const map_type> domainMap = A_graph->getDomainMap ();
1145 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
1146 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
1154 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
1161 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
1165 #ifdef HAVE_TPETRA_DEBUG
1166 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1167 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1168 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1169 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1170 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
1171 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1172 #endif // HAVE_TPETRA_DEBUG
1174 Array<RCP<const crs_matrix_type> > Mat =
1175 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
1176 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
1179 for (
int k = 0; k < 2; ++k) {
1180 Array<GlobalOrdinal> Indices;
1181 Array<Scalar> Values;
1189 #ifdef HAVE_TPETRA_DEBUG
1190 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
1191 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1192 #endif // HAVE_TPETRA_DEBUG
1193 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
1194 #ifdef HAVE_TPETRA_DEBUG
1195 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
1196 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1197 #endif // HAVE_TPETRA_DEBUG
1199 const size_t localNumRows = Mat[k]->getNodeNumRows ();
1200 for (
size_t i = 0; i < localNumRows; ++i) {
1201 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
1202 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
1203 if (numEntries > 0) {
1204 Indices.resize (numEntries);
1205 Values.resize (numEntries);
1206 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
1208 if (scalar[k] != STS::one ()) {
1209 for (
size_t j = 0; j < numEntries; ++j) {
1210 Values[j] *= scalar[k];
1214 if (C->isFillComplete ()) {
1215 C->sumIntoGlobalValues (globalRow, Indices, Values);
1217 C->insertGlobalValues (globalRow, Indices, Values);
1224 template<
typename SC,
typename LO,
typename GO,
typename NO>
1225 void AddDetails::AddKernels<SC, LO, GO, NO>::
1227 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1228 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1229 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1230 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1231 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1232 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1233 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1234 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1235 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1236 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1237 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1239 using Teuchos::TimeMonitor;
1240 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1241 auto nrows = Arowptrs.extent(0) - 1;
1242 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1243 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, impl_scalar_type,
1244 execution_space, memory_space, memory_space> KKH;
1246 handle.create_spadd_handle(
true);
1247 auto addHandle = handle.get_spadd_handle();
1248 #ifdef HAVE_TPETRA_MMM_TIMINGS
1249 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1251 KokkosSparse::Experimental::spadd_symbolic
1253 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1254 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1255 row_ptrs_array, col_inds_array>
1256 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1257 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1258 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1259 #ifdef HAVE_TPETRA_MMM_TIMINGS
1261 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
1263 KokkosSparse::Experimental::spadd_numeric(&handle,
1264 Arowptrs, Acolinds, Avals, scalarA,
1265 Browptrs, Bcolinds, Bvals, scalarB,
1266 Crowptrs, Ccolinds, Cvals);
1269 template<
typename SC,
typename LO,
typename GO,
typename NO>
1270 void AddDetails::AddKernels<SC, LO, GO, NO>::
1272 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1273 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1274 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1275 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1276 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1277 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1278 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1279 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1281 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1282 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1283 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1285 using Teuchos::TimeMonitor;
1286 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1287 auto nrows = Arowptrs.extent(0) - 1;
1288 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1289 typedef AddDetails::AddKernels<SC, LO, GO, NO> AddKern;
1290 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, AddKern::impl_scalar_type,
1291 AddKern::execution_space, AddKern::memory_space, AddKern::memory_space> KKH;
1293 handle.create_spadd_handle(
false);
1294 auto addHandle = handle.get_spadd_handle();
1295 #ifdef HAVE_TPETRA_MMM_TIMINGS
1296 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1298 KokkosSparse::Experimental::spadd_symbolic
1300 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1301 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1302 row_ptrs_array, col_inds_array>
1303 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1304 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1305 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1306 #ifdef HAVE_TPETRA_MMM_TIMINGS
1308 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted kernel: sorted numeric")));
1310 KokkosSparse::Experimental::spadd_numeric(&handle,
1311 Arowptrs, Acolinds, Avals, scalarA,
1312 Browptrs, Bcolinds, Bvals, scalarB,
1313 Crowptrs, Ccolinds, Cvals);
1316 template<
typename GO,
1317 typename LocalIndicesType,
1318 typename GlobalIndicesType,
1319 typename ColMapType>
1320 struct ConvertColIndsFunctor
1322 ConvertColIndsFunctor (
const GO minGlobal_,
1323 const LocalIndicesType& colindsOrig_,
1324 const GlobalIndicesType& colindsConverted_,
1325 const ColMapType& colmap_) :
1326 minGlobal (minGlobal_),
1327 colindsOrig (colindsOrig_),
1328 colindsConverted (colindsConverted_),
1331 KOKKOS_INLINE_FUNCTION
void
1332 operator() (
const size_t& i)
const
1334 colindsConverted[i] = colmap[colindsOrig[i]];
1337 LocalIndicesType colindsOrig;
1338 GlobalIndicesType colindsConverted;
1342 template<
typename SC,
typename LO,
typename GO,
typename NO>
1343 void AddDetails::AddKernels<SC, LO, GO, NO>::
1344 convertToGlobalAndAdd(
1345 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
1346 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1347 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
1348 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1349 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
1350 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
1353 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1354 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1355 typename AddDetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
1357 using Teuchos::TimeMonitor;
1359 const values_array& Avals = A.values;
1360 const values_array& Bvals = B.values;
1361 const col_inds_array& Acolinds = A.graph.entries;
1362 const col_inds_array& Bcolinds = B.graph.entries;
1363 auto Arowptrs = A.graph.row_map;
1364 auto Browptrs = B.graph.row_map;
1365 global_col_inds_array AcolindsConverted(
"A colinds (converted)", Acolinds.extent(0));
1366 global_col_inds_array BcolindsConverted(
"B colinds (converted)", Bcolinds.extent(0));
1367 #ifdef HAVE_TPETRA_MMM_TIMINGS
1368 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
1370 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(minGlobalCol, Acolinds, AcolindsConverted, AcolMap);
1371 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
1372 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(minGlobalCol, Bcolinds, BcolindsConverted, BcolMap);
1373 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
1374 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, GO, impl_scalar_type,
1375 execution_space, memory_space, memory_space> KKH;
1377 handle.create_spadd_handle(
false);
1378 auto addHandle = handle.get_spadd_handle();
1379 #ifdef HAVE_TPETRA_MMM_TIMINGS
1381 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
1383 auto nrows = Arowptrs.extent(0) - 1;
1384 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1385 KokkosSparse::Experimental::spadd_symbolic
1386 <KKH,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type, row_ptrs_array, global_col_inds_array>
1387 (&handle, Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
1388 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1389 Ccolinds = global_col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1390 #ifdef HAVE_TPETRA_MMM_TIMINGS
1392 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
1394 KokkosSparse::Experimental::spadd_numeric(&handle,
1395 Arowptrs, AcolindsConverted, Avals, scalarA,
1396 Browptrs, BcolindsConverted, Bvals, scalarB,
1397 Crowptrs, Ccolinds, Cvals);
1402 namespace MMdetails{
1406 template <
class TransferType>
1407 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer,
const std::string &label) {
1408 if (Transfer.is_null())
1411 const Distributor & Distor = Transfer->getDistributor();
1412 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
1414 size_t rows_send = Transfer->getNumExportIDs();
1415 size_t rows_recv = Transfer->getNumRemoteIDs();
1417 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
1418 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
1419 size_t num_send_neighbors = Distor.getNumSends();
1420 size_t num_recv_neighbors = Distor.getNumReceives();
1421 size_t round2_send, round2_recv;
1422 Distor.getLastDoStatistics(round2_send,round2_recv);
1424 int myPID = Comm->getRank();
1425 int NumProcs = Comm->getSize();
1432 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
1433 size_t gstats_min[8], gstats_max[8];
1435 double lstats_avg[8], gstats_avg[8];
1436 for(
int i=0; i<8; i++)
1437 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
1439 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
1440 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
1441 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
1444 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1445 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
1446 (
int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (
int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
1447 printf("%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1448 (
int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (
int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
1449 (
int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (
int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
1456 template<class Scalar,
1458 class GlobalOrdinal,
1460 void mult_AT_B_newmatrix(
1461 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1462 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1463 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1464 const std::
string & label,
1465 const Teuchos::RCP<Teuchos::ParameterList>& params)
1471 typedef LocalOrdinal LO;
1472 typedef GlobalOrdinal GO;
1474 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1475 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1477 #ifdef HAVE_TPETRA_MMM_TIMINGS
1478 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1479 using Teuchos::TimeMonitor;
1480 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T Transpose"))));
1486 transposer_type transposer (rcpFromRef (A),label+std::string(
"XP: "));
1487 RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1488 if(!params.is_null()) transposeParams->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1489 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = transposer.createTransposeLocal(transposeParams);
1494 #ifdef HAVE_TPETRA_MMM_TIMINGS
1496 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T I&X"))));
1500 crs_matrix_struct_type Aview;
1501 crs_matrix_struct_type Bview;
1502 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
1505 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
1506 if(!params.is_null()) {
1507 importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1508 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1509 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1510 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1512 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1513 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1514 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1515 auto & sip1 = importParams1->sublist(
"matrixmatrix: kernel params",
false);
1516 sip1.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1517 sip1.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
1518 sip1.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1521 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true, label,importParams1);
1523 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
1524 if(!params.is_null()){
1525 importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1526 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1527 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1528 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1530 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1531 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1532 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1533 auto & sip2 = importParams2->sublist(
"matrixmatrix: kernel params",
false);
1534 sip2.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1535 sip2.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
1536 sip2.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1539 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1540 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1543 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1546 #ifdef HAVE_TPETRA_MMM_TIMINGS
1548 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1551 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
1554 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1555 if (needs_final_export)
1558 Ctemp = rcp(&C,
false);
1561 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1566 #ifdef HAVE_TPETRA_MMM_TIMINGS
1568 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1571 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
1573 if (needs_final_export) {
1574 Teuchos::ParameterList labelList;
1575 labelList.set(
"Timer Label", label);
1576 if(!params.is_null()) {
1577 Teuchos::ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1578 Teuchos::ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1580 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1581 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1582 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1583 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1584 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1585 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1587 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM,
1588 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1589 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1590 labelList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1592 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1593 B.getDomainMap(),A.getDomainMap(),rcp(&labelList,
false));
1595 #ifdef HAVE_TPETRA_MMM_STATISTICS
1596 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1602 template<
class Scalar,
1604 class GlobalOrdinal,
1607 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1608 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1609 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1610 const std::string& ,
1611 const Teuchos::RCP<Teuchos::ParameterList>& )
1613 using Teuchos::Array;
1614 using Teuchos::ArrayRCP;
1615 using Teuchos::ArrayView;
1616 using Teuchos::OrdinalTraits;
1617 using Teuchos::null;
1619 typedef Teuchos::ScalarTraits<Scalar> STS;
1621 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1622 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1624 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1625 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1627 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
1628 ArrayView<const GlobalOrdinal> bcols_import = null;
1629 if (Bview.importColMap != null) {
1630 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1631 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1633 bcols_import = Bview.importColMap->getNodeElementList();
1636 size_t C_numCols = C_lastCol - C_firstCol +
1637 OrdinalTraits<LocalOrdinal>::one();
1638 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1639 OrdinalTraits<LocalOrdinal>::one();
1641 if (C_numCols_import > C_numCols)
1642 C_numCols = C_numCols_import;
1644 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1645 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1646 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1648 Array<Scalar> C_row_i = dwork;
1649 Array<GlobalOrdinal> C_cols = iwork;
1650 Array<size_t> c_index = iwork2;
1651 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1652 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1654 size_t C_row_i_length, j, k, last_index;
1657 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1658 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1659 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1660 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1662 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1663 Aview.colMap->getMaxLocalIndex(); i++)
1668 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1669 Aview.colMap->getMaxLocalIndex(); i++) {
1670 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1671 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1672 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1673 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1683 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1684 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1685 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1686 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1687 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1688 ArrayView<const Scalar> Avals, Bvals, Ivals;
1689 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1690 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1691 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1692 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1693 if(!Bview.importMatrix.is_null()) {
1694 Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1695 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1698 bool C_filled = C.isFillComplete();
1700 for (
size_t i = 0; i < C_numCols; i++)
1701 c_index[i] = OrdinalTraits<size_t>::invalid();
1704 size_t Arows = Aview.rowMap->getNodeNumElements();
1705 for(
size_t i=0; i<Arows; ++i) {
1709 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1715 C_row_i_length = OrdinalTraits<size_t>::zero();
1717 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1718 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1719 const Scalar Aval = Avals[k];
1720 if (Aval == STS::zero())
1723 if (Ak == LO_INVALID)
1726 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1727 LocalOrdinal col = Bcolind[j];
1730 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1733 C_row_i[C_row_i_length] = Aval*Bvals[j];
1734 C_cols[C_row_i_length] = col;
1735 c_index[col] = C_row_i_length;
1739 C_row_i[c_index[col]] += Aval*Bvals[j];
1744 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1745 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1746 C_cols[ii] = bcols[C_cols[ii]];
1747 combined_index[ii] = C_cols[ii];
1748 combined_values[ii] = C_row_i[ii];
1750 last_index = C_row_i_length;
1756 C_row_i_length = OrdinalTraits<size_t>::zero();
1758 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1759 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1760 const Scalar Aval = Avals[k];
1761 if (Aval == STS::zero())
1764 if (Ak!=LO_INVALID)
continue;
1766 Ak = Acol2Irow[Acolind[k]];
1767 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1768 LocalOrdinal col = Icolind[j];
1771 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1774 C_row_i[C_row_i_length] = Aval*Ivals[j];
1775 C_cols[C_row_i_length] = col;
1776 c_index[col] = C_row_i_length;
1781 C_row_i[c_index[col]] += Aval*Ivals[j];
1786 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1787 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1788 C_cols[ii] = bcols_import[C_cols[ii]];
1789 combined_index[last_index] = C_cols[ii];
1790 combined_values[last_index] = C_row_i[ii];
1797 C.sumIntoGlobalValues(
1799 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1800 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1802 C.insertGlobalValues(
1804 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1805 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1811 template<
class Scalar,
1813 class GlobalOrdinal,
1815 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1816 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1817 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1819 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1820 Mview.maxNumRowEntries = Mview.indices[0].size();
1822 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1823 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1824 Mview.maxNumRowEntries = Mview.indices[i].size();
1829 template<
class CrsMatrixType>
1830 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1832 size_t Aest = 100, Best=100;
1833 if (A.getNodeNumEntries() > 0)
1834 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumRows() : 100;
1835 if (B.getNodeNumEntries() > 0)
1836 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumRows() : 100;
1838 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1839 nnzperrow *= nnzperrow;
1841 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1851 template<
class Scalar,
1853 class GlobalOrdinal,
1855 void mult_A_B_newmatrix(
1856 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1857 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1858 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1859 const std::string& label,
1860 const Teuchos::RCP<Teuchos::ParameterList>& params)
1862 using Teuchos::Array;
1863 using Teuchos::ArrayRCP;
1864 using Teuchos::ArrayView;
1869 typedef LocalOrdinal LO;
1870 typedef GlobalOrdinal GO;
1872 typedef Import<LO,GO,NO> import_type;
1873 typedef Map<LO,GO,NO> map_type;
1876 typedef typename map_type::local_map_type local_map_type;
1878 typedef typename KCRS::StaticCrsGraphType graph_t;
1879 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1880 typedef typename NO::execution_space execution_space;
1881 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1882 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1884 #ifdef HAVE_TPETRA_MMM_TIMINGS
1885 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1886 using Teuchos::TimeMonitor;
1887 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1889 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1892 RCP<const import_type> Cimport;
1893 RCP<const map_type> Ccolmap;
1894 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1895 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1896 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1897 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1898 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1899 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1900 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1901 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1908 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
1910 if (Bview.importMatrix.is_null()) {
1913 Ccolmap = Bview.colMap;
1914 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getNodeNumElements());
1916 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1917 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1918 KOKKOS_LAMBDA(
const LO i) {
1931 if (!Bimport.is_null() && !Iimport.is_null()) {
1932 Cimport = Bimport->setUnion(*Iimport);
1934 else if (!Bimport.is_null() && Iimport.is_null()) {
1935 Cimport = Bimport->setUnion();
1937 else if (Bimport.is_null() && !Iimport.is_null()) {
1938 Cimport = Iimport->setUnion();
1941 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1943 Ccolmap = Cimport->getTargetMap();
1948 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1949 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1956 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
1957 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1958 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1959 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1961 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1962 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1971 C.replaceColMap(Ccolmap);
1989 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
1990 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
1992 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1993 GO aidx = Acolmap_local.getGlobalElement(i);
1994 LO B_LID = Browmap_local.getLocalElement(aidx);
1995 if (B_LID != LO_INVALID) {
1996 targetMapToOrigRow(i) = B_LID;
1997 targetMapToImportRow(i) = LO_INVALID;
1999 LO I_LID = Irowmap_local.getLocalElement(aidx);
2000 targetMapToOrigRow(i) = LO_INVALID;
2001 targetMapToImportRow(i) = I_LID;
2008 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2015 template<
class Scalar,
2017 class GlobalOrdinal,
2019 class LocalOrdinalViewType>
2020 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2021 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2022 const LocalOrdinalViewType & targetMapToOrigRow,
2023 const LocalOrdinalViewType & targetMapToImportRow,
2024 const LocalOrdinalViewType & Bcol2Ccol,
2025 const LocalOrdinalViewType & Icol2Ccol,
2026 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2027 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2028 const std::string& label,
2029 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2030 #ifdef HAVE_TPETRA_MMM_TIMINGS
2031 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2032 using Teuchos::TimeMonitor;
2033 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
2036 using Teuchos::Array;
2037 using Teuchos::ArrayRCP;
2038 using Teuchos::ArrayView;
2044 typedef typename KCRS::StaticCrsGraphType graph_t;
2045 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2046 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2047 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2048 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2051 typedef LocalOrdinal LO;
2052 typedef GlobalOrdinal GO;
2054 typedef Map<LO,GO,NO> map_type;
2055 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2056 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2057 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2060 RCP<const map_type> Ccolmap = C.getColMap();
2061 size_t m = Aview.origMatrix->getNodeNumRows();
2062 size_t n = Ccolmap->getNodeNumElements();
2063 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
2066 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2067 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2069 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2070 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2071 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2073 c_lno_view_t Irowptr;
2074 lno_nnz_view_t Icolind;
2075 scalar_view_t Ivals;
2076 if(!Bview.importMatrix.is_null()) {
2077 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2078 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2079 Ivals = Bview.importMatrix->getLocalMatrix().values;
2080 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
2083 #ifdef HAVE_TPETRA_MMM_TIMINGS
2084 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2094 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2095 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2096 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2097 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2107 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2108 std::vector<size_t> c_status(n, ST_INVALID);
2118 size_t CSR_ip = 0, OLD_ip = 0;
2119 for (
size_t i = 0; i < m; i++) {
2122 Crowptr[i] = CSR_ip;
2125 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2126 LO Aik = Acolind[k];
2127 const SC Aval = Avals[k];
2128 if (Aval == SC_ZERO)
2131 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2138 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2141 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2142 LO Bkj = Bcolind[j];
2143 LO Cij = Bcol2Ccol[Bkj];
2145 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2147 c_status[Cij] = CSR_ip;
2148 Ccolind[CSR_ip] = Cij;
2149 Cvals[CSR_ip] = Aval*Bvals[j];
2153 Cvals[c_status[Cij]] += Aval*Bvals[j];
2164 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2165 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2166 LO Ikj = Icolind[j];
2167 LO Cij = Icol2Ccol[Ikj];
2169 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2171 c_status[Cij] = CSR_ip;
2172 Ccolind[CSR_ip] = Cij;
2173 Cvals[CSR_ip] = Aval*Ivals[j];
2176 Cvals[c_status[Cij]] += Aval*Ivals[j];
2183 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2185 Kokkos::resize(Ccolind,CSR_alloc);
2186 Kokkos::resize(Cvals,CSR_alloc);
2191 Crowptr[m] = CSR_ip;
2194 Kokkos::resize(Ccolind,CSR_ip);
2195 Kokkos::resize(Cvals,CSR_ip);
2197 #ifdef HAVE_TPETRA_MMM_TIMINGS
2199 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort")));
2203 if (params.is_null() || params->get(
"sort entries",
true))
2204 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2205 C.setAllValues(Crowptr,Ccolind, Cvals);
2208 #ifdef HAVE_TPETRA_MMM_TIMINGS
2210 auto MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC")));
2222 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2223 labelList->set(
"Timer Label",label);
2224 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2225 RCP<const Export<LO,GO,NO> > dummyExport;
2226 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2227 #ifdef HAVE_TPETRA_MMM_TIMINGS
2229 MM2 = Teuchos::null;
2239 template<
class Scalar,
2241 class GlobalOrdinal,
2243 void mult_A_B_reuse(
2244 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2245 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2246 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2247 const std::string& label,
2248 const Teuchos::RCP<Teuchos::ParameterList>& params)
2250 using Teuchos::Array;
2251 using Teuchos::ArrayRCP;
2252 using Teuchos::ArrayView;
2257 typedef LocalOrdinal LO;
2258 typedef GlobalOrdinal GO;
2260 typedef Import<LO,GO,NO> import_type;
2261 typedef Map<LO,GO,NO> map_type;
2264 typedef typename map_type::local_map_type local_map_type;
2266 typedef typename KCRS::StaticCrsGraphType graph_t;
2267 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2268 typedef typename NO::execution_space execution_space;
2269 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2270 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2272 #ifdef HAVE_TPETRA_MMM_TIMINGS
2273 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2274 using Teuchos::TimeMonitor;
2275 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2277 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2280 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2281 RCP<const map_type> Ccolmap = C.getColMap();
2282 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2283 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2284 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2285 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2286 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2287 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2290 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2294 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2295 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2298 if (!Bview.importMatrix.is_null()) {
2299 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2300 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2302 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2303 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2304 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2310 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2311 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2312 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2313 GO aidx = Acolmap_local.getGlobalElement(i);
2314 LO B_LID = Browmap_local.getLocalElement(aidx);
2315 if (B_LID != LO_INVALID) {
2316 targetMapToOrigRow(i) = B_LID;
2317 targetMapToImportRow(i) = LO_INVALID;
2319 LO I_LID = Irowmap_local.getLocalElement(aidx);
2320 targetMapToOrigRow(i) = LO_INVALID;
2321 targetMapToImportRow(i) = I_LID;
2328 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2332 template<
class Scalar,
2334 class GlobalOrdinal,
2336 class LocalOrdinalViewType>
2337 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2338 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2339 const LocalOrdinalViewType & targetMapToOrigRow,
2340 const LocalOrdinalViewType & targetMapToImportRow,
2341 const LocalOrdinalViewType & Bcol2Ccol,
2342 const LocalOrdinalViewType & Icol2Ccol,
2343 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2344 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2345 const std::string& label,
2346 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2347 #ifdef HAVE_TPETRA_MMM_TIMINGS
2348 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2349 using Teuchos::TimeMonitor;
2350 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2351 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2361 typedef typename KCRS::StaticCrsGraphType graph_t;
2362 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2363 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2364 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2367 typedef LocalOrdinal LO;
2368 typedef GlobalOrdinal GO;
2370 typedef Map<LO,GO,NO> map_type;
2371 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2372 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2373 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2376 RCP<const map_type> Ccolmap = C.getColMap();
2377 size_t m = Aview.origMatrix->getNodeNumRows();
2378 size_t n = Ccolmap->getNodeNumElements();
2381 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2382 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2383 const KCRS & Cmat = C.getLocalMatrix();
2385 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2386 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2387 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2388 scalar_view_t Cvals = Cmat.values;
2390 c_lno_view_t Irowptr;
2391 lno_nnz_view_t Icolind;
2392 scalar_view_t Ivals;
2393 if(!Bview.importMatrix.is_null()) {
2394 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2395 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2396 Ivals = Bview.importMatrix->getLocalMatrix().values;
2399 #ifdef HAVE_TPETRA_MMM_TIMINGS
2400 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2412 std::vector<size_t> c_status(n, ST_INVALID);
2415 size_t CSR_ip = 0, OLD_ip = 0;
2416 for (
size_t i = 0; i < m; i++) {
2419 OLD_ip = Crowptr[i];
2420 CSR_ip = Crowptr[i+1];
2421 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2422 c_status[Ccolind[k]] = k;
2428 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2429 LO Aik = Acolind[k];
2430 const SC Aval = Avals[k];
2431 if (Aval == SC_ZERO)
2434 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2436 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2438 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2439 LO Bkj = Bcolind[j];
2440 LO Cij = Bcol2Ccol[Bkj];
2442 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2443 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2444 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2446 Cvals[c_status[Cij]] += Aval * Bvals[j];
2451 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2452 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2453 LO Ikj = Icolind[j];
2454 LO Cij = Icol2Ccol[Ikj];
2456 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2457 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2458 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2460 Cvals[c_status[Cij]] += Aval * Ivals[j];
2466 #ifdef HAVE_TPETRA_MMM_TIMINGS
2467 auto MM3 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2470 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2476 template<
class Scalar,
2478 class GlobalOrdinal,
2480 void jacobi_A_B_newmatrix(
2482 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2484 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2485 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2486 const std::string& label,
2487 const Teuchos::RCP<Teuchos::ParameterList>& params)
2489 using Teuchos::Array;
2490 using Teuchos::ArrayRCP;
2491 using Teuchos::ArrayView;
2495 typedef LocalOrdinal LO;
2496 typedef GlobalOrdinal GO;
2499 typedef Import<LO,GO,NO> import_type;
2500 typedef Map<LO,GO,NO> map_type;
2501 typedef typename map_type::local_map_type local_map_type;
2505 typedef typename KCRS::StaticCrsGraphType graph_t;
2506 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2507 typedef typename NO::execution_space execution_space;
2508 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2509 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2512 #ifdef HAVE_TPETRA_MMM_TIMINGS
2513 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2514 using Teuchos::TimeMonitor;
2515 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2517 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2520 RCP<const import_type> Cimport;
2521 RCP<const map_type> Ccolmap;
2522 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2523 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2524 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2525 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2526 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2527 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2528 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2529 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2536 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2538 if (Bview.importMatrix.is_null()) {
2541 Ccolmap = Bview.colMap;
2545 Kokkos::RangePolicy<execution_space, LO> range (0, static_cast<LO> (Bview.colMap->getNodeNumElements ()));
2546 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2547 Bcol2Ccol(i) =
static_cast<LO
> (i);
2558 if (!Bimport.is_null() && !Iimport.is_null()){
2559 Cimport = Bimport->setUnion(*Iimport);
2560 Ccolmap = Cimport->getTargetMap();
2562 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2563 Cimport = Bimport->setUnion();
2565 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2566 Cimport = Iimport->setUnion();
2569 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2571 Ccolmap = Cimport->getTargetMap();
2573 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2574 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2581 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2582 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2583 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2584 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2586 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2587 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2596 C.replaceColMap(Ccolmap);
2614 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2615 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2616 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2617 GO aidx = Acolmap_local.getGlobalElement(i);
2618 LO B_LID = Browmap_local.getLocalElement(aidx);
2619 if (B_LID != LO_INVALID) {
2620 targetMapToOrigRow(i) = B_LID;
2621 targetMapToImportRow(i) = LO_INVALID;
2623 LO I_LID = Irowmap_local.getLocalElement(aidx);
2624 targetMapToOrigRow(i) = LO_INVALID;
2625 targetMapToImportRow(i) = I_LID;
2632 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2641 template<
class Scalar,
2643 class GlobalOrdinal,
2645 class LocalOrdinalViewType>
2646 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2647 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2648 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2649 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2650 const LocalOrdinalViewType & targetMapToOrigRow,
2651 const LocalOrdinalViewType & targetMapToImportRow,
2652 const LocalOrdinalViewType & Bcol2Ccol,
2653 const LocalOrdinalViewType & Icol2Ccol,
2654 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2655 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2656 const std::string& label,
2657 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2659 #ifdef HAVE_TPETRA_MMM_TIMINGS
2660 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2661 using Teuchos::TimeMonitor;
2662 auto MM(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore")));
2665 using Teuchos::Array;
2666 using Teuchos::ArrayRCP;
2667 using Teuchos::ArrayView;
2673 typedef typename KCRS::StaticCrsGraphType graph_t;
2674 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2675 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2676 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2677 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2680 typedef typename scalar_view_t::memory_space scalar_memory_space;
2683 typedef LocalOrdinal LO;
2684 typedef GlobalOrdinal GO;
2687 typedef Map<LO,GO,NO> map_type;
2688 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2689 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2692 RCP<const map_type> Ccolmap = C.getColMap();
2693 size_t m = Aview.origMatrix->getNodeNumRows();
2694 size_t n = Ccolmap->getNodeNumElements();
2695 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
2698 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2699 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2701 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2702 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2703 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2705 c_lno_view_t Irowptr;
2706 lno_nnz_view_t Icolind;
2707 scalar_view_t Ivals;
2708 if(!Bview.importMatrix.is_null()) {
2709 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2710 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2711 Ivals = Bview.importMatrix->getLocalMatrix().values;
2712 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
2716 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2724 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2725 Array<size_t> c_status(n, ST_INVALID);
2734 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2735 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2736 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2737 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2738 size_t CSR_ip = 0, OLD_ip = 0;
2740 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2754 for (
size_t i = 0; i < m; i++) {
2757 Crowptr[i] = CSR_ip;
2758 SC minusOmegaDval = -omega*Dvals(i,0);
2761 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2762 Scalar Bval = Bvals[j];
2763 if (Bval == SC_ZERO)
2765 LO Bij = Bcolind[j];
2766 LO Cij = Bcol2Ccol[Bij];
2769 c_status[Cij] = CSR_ip;
2770 Ccolind[CSR_ip] = Cij;
2771 Cvals[CSR_ip] = Bvals[j];
2776 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2777 LO Aik = Acolind[k];
2778 const SC Aval = Avals[k];
2779 if (Aval == SC_ZERO)
2782 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2784 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2786 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2787 LO Bkj = Bcolind[j];
2788 LO Cij = Bcol2Ccol[Bkj];
2790 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2792 c_status[Cij] = CSR_ip;
2793 Ccolind[CSR_ip] = Cij;
2794 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2798 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2804 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2805 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2806 LO Ikj = Icolind[j];
2807 LO Cij = Icol2Ccol[Ikj];
2809 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2811 c_status[Cij] = CSR_ip;
2812 Ccolind[CSR_ip] = Cij;
2813 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2816 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2823 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2825 Kokkos::resize(Ccolind,CSR_alloc);
2826 Kokkos::resize(Cvals,CSR_alloc);
2830 Crowptr[m] = CSR_ip;
2833 Kokkos::resize(Ccolind,CSR_ip);
2834 Kokkos::resize(Cvals,CSR_ip);
2837 #ifdef HAVE_TPETRA_MMM_TIMINGS
2838 auto MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort")));
2845 C.replaceColMap(Ccolmap);
2852 if (params.is_null() || params->get(
"sort entries",
true))
2853 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2854 C.setAllValues(Crowptr,Ccolind, Cvals);
2857 #ifdef HAVE_TPETRA_MMM_TIMINGS
2858 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC")));
2869 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2870 labelList->set(
"Timer Label",label);
2871 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2872 RCP<const Export<LO,GO,NO> > dummyExport;
2873 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2881 template<
class Scalar,
2883 class GlobalOrdinal,
2885 void jacobi_A_B_reuse(
2887 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2888 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2889 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2890 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2891 const std::string& label,
2892 const Teuchos::RCP<Teuchos::ParameterList>& params)
2894 using Teuchos::Array;
2895 using Teuchos::ArrayRCP;
2896 using Teuchos::ArrayView;
2900 typedef LocalOrdinal LO;
2901 typedef GlobalOrdinal GO;
2904 typedef Import<LO,GO,NO> import_type;
2905 typedef Map<LO,GO,NO> map_type;
2908 typedef typename map_type::local_map_type local_map_type;
2910 typedef typename KCRS::StaticCrsGraphType graph_t;
2911 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2912 typedef typename NO::execution_space execution_space;
2913 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2914 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2916 #ifdef HAVE_TPETRA_MMM_TIMINGS
2917 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2918 using Teuchos::TimeMonitor;
2919 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2921 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2924 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2925 RCP<const map_type> Ccolmap = C.getColMap();
2926 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2927 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2928 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2929 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2930 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2931 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2934 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2938 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2939 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2942 if (!Bview.importMatrix.is_null()) {
2943 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2944 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2946 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2947 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2948 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2954 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2955 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2956 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2957 GO aidx = Acolmap_local.getGlobalElement(i);
2958 LO B_LID = Browmap_local.getLocalElement(aidx);
2959 if (B_LID != LO_INVALID) {
2960 targetMapToOrigRow(i) = B_LID;
2961 targetMapToImportRow(i) = LO_INVALID;
2963 LO I_LID = Irowmap_local.getLocalElement(aidx);
2964 targetMapToOrigRow(i) = LO_INVALID;
2965 targetMapToImportRow(i) = I_LID;
2970 #ifdef HAVE_TPETRA_MMM_TIMINGS
2976 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2982 template<
class Scalar,
2984 class GlobalOrdinal,
2986 class LocalOrdinalViewType>
2987 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2988 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2989 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2990 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2991 const LocalOrdinalViewType & targetMapToOrigRow,
2992 const LocalOrdinalViewType & targetMapToImportRow,
2993 const LocalOrdinalViewType & Bcol2Ccol,
2994 const LocalOrdinalViewType & Icol2Ccol,
2995 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2996 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2997 const std::string& label,
2998 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2999 #ifdef HAVE_TPETRA_MMM_TIMINGS
3000 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3001 using Teuchos::TimeMonitor;
3002 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
3003 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
3012 typedef typename KCRS::StaticCrsGraphType graph_t;
3013 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
3014 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3015 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3016 typedef typename scalar_view_t::memory_space scalar_memory_space;
3019 typedef LocalOrdinal LO;
3020 typedef GlobalOrdinal GO;
3022 typedef Map<LO,GO,NO> map_type;
3023 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3024 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3025 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
3028 RCP<const map_type> Ccolmap = C.getColMap();
3029 size_t m = Aview.origMatrix->getNodeNumRows();
3030 size_t n = Ccolmap->getNodeNumElements();
3033 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
3034 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
3035 const KCRS & Cmat = C.getLocalMatrix();
3037 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
3038 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
3039 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
3040 scalar_view_t Cvals = Cmat.values;
3042 c_lno_view_t Irowptr;
3043 lno_nnz_view_t Icolind;
3044 scalar_view_t Ivals;
3045 if(!Bview.importMatrix.is_null()) {
3046 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
3047 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
3048 Ivals = Bview.importMatrix->getLocalMatrix().values;
3052 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
3054 #ifdef HAVE_TPETRA_MMM_TIMINGS
3055 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
3063 std::vector<size_t> c_status(n, ST_INVALID);
3066 size_t CSR_ip = 0, OLD_ip = 0;
3067 for (
size_t i = 0; i < m; i++) {
3071 OLD_ip = Crowptr[i];
3072 CSR_ip = Crowptr[i+1];
3073 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
3074 c_status[Ccolind[k]] = k;
3080 SC minusOmegaDval = -omega*Dvals(i,0);
3083 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
3084 Scalar Bval = Bvals[j];
3085 if (Bval == SC_ZERO)
3087 LO Bij = Bcolind[j];
3088 LO Cij = Bcol2Ccol[Bij];
3090 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3091 std::runtime_error,
"Trying to insert a new entry into a static graph");
3093 Cvals[c_status[Cij]] = Bvals[j];
3097 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3098 LO Aik = Acolind[k];
3099 const SC Aval = Avals[k];
3100 if (Aval == SC_ZERO)
3103 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3105 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3107 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3108 LO Bkj = Bcolind[j];
3109 LO Cij = Bcol2Ccol[Bkj];
3111 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3112 std::runtime_error,
"Trying to insert a new entry into a static graph");
3114 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3119 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3120 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3121 LO Ikj = Icolind[j];
3122 LO Cij = Icol2Ccol[Ikj];
3124 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3125 std::runtime_error,
"Trying to insert a new entry into a static graph");
3127 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3133 #ifdef HAVE_TPETRA_MMM_TIMINGS
3134 MM2 = Teuchos::null;
3135 MM2 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3138 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3139 #ifdef HAVE_TPETRA_MMM_TIMINGS
3140 MM2 = Teuchos::null;
3149 template<
class Scalar,
3151 class GlobalOrdinal,
3153 void import_and_extract_views(
3154 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3155 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3156 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3157 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3158 bool userAssertsThereAreNoRemotes,
3159 const std::string& label,
3160 const Teuchos::RCP<Teuchos::ParameterList>& params)
3162 using Teuchos::Array;
3163 using Teuchos::ArrayView;
3166 using Teuchos::null;
3169 typedef LocalOrdinal LO;
3170 typedef GlobalOrdinal GO;
3173 typedef Map<LO,GO,NO> map_type;
3174 typedef Import<LO,GO,NO> import_type;
3175 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3177 #ifdef HAVE_TPETRA_MMM_TIMINGS
3178 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3179 using Teuchos::TimeMonitor;
3180 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3188 Aview.deleteContents();
3190 Aview.origMatrix = rcp(&A,
false);
3191 Aview.origRowMap = A.getRowMap();
3192 Aview.rowMap = targetMap;
3193 Aview.colMap = A.getColMap();
3194 Aview.domainMap = A.getDomainMap();
3195 Aview.importColMap = null;
3198 if (userAssertsThereAreNoRemotes)
3201 RCP<const import_type> importer;
3202 if (params != null && params->isParameter(
"importer")) {
3203 importer = params->get<RCP<const import_type> >(
"importer");
3206 #ifdef HAVE_TPETRA_MMM_TIMINGS
3208 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3213 RCP<const map_type> rowMap = A.getRowMap(), remoteRowMap;
3214 size_t numRemote = 0;
3216 if (!prototypeImporter.is_null() &&
3217 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3218 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3220 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3221 numRemote = prototypeImporter->getNumRemoteIDs();
3223 Array<GO> remoteRows(numRemote);
3224 for (
size_t i = 0; i < numRemote; i++)
3225 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3227 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3228 rowMap->getIndexBase(), rowMap->getComm()));
3231 }
else if (prototypeImporter.is_null()) {
3233 ArrayView<const GO> rows = targetMap->getNodeElementList();
3234 size_t numRows = targetMap->getNodeNumElements();
3236 Array<GO> remoteRows(numRows);
3237 for(
size_t i = 0; i < numRows; ++i) {
3238 const LO mlid = rowMap->getLocalElement(rows[i]);
3240 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3241 remoteRows[numRemote++] = rows[i];
3243 remoteRows.resize(numRemote);
3244 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3245 rowMap->getIndexBase(), rowMap->getComm()));
3253 const int numProcs = rowMap->getComm()->getSize();
3256 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3257 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3266 #ifdef HAVE_TPETRA_MMM_TIMINGS
3268 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3272 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3274 if (globalMaxNumRemote > 0) {
3275 #ifdef HAVE_TPETRA_MMM_TIMINGS
3277 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3281 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3283 importer = rcp(
new import_type(rowMap, remoteRowMap));
3285 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3289 params->set(
"importer", importer);
3292 if (importer != null) {
3293 #ifdef HAVE_TPETRA_MMM_TIMINGS
3295 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3299 Teuchos::ParameterList labelList;
3300 labelList.set(
"Timer Label", label);
3301 auto & labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3304 bool overrideAllreduce =
false;
3307 Teuchos::ParameterList params_sublist;
3308 if(!params.is_null()) {
3309 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3310 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3311 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3312 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3313 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
3314 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3315 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3317 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
3318 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3319 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
3321 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3322 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3328 sprintf(str,
"import_matrix.%d.dat",count);
3333 #ifdef HAVE_TPETRA_MMM_STATISTICS
3334 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3338 #ifdef HAVE_TPETRA_MMM_TIMINGS
3340 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3344 Aview.importColMap = Aview.importMatrix->getColMap();
3345 #ifdef HAVE_TPETRA_MMM_TIMINGS
3357 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3359 merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3360 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3361 const LocalOrdinalViewType & Acol2Brow,
3362 const LocalOrdinalViewType & Acol2Irow,
3363 const LocalOrdinalViewType & Bcol2Ccol,
3364 const LocalOrdinalViewType & Icol2Ccol,
3365 const size_t mergedNodeNumCols) {
3369 typedef typename KCRS::StaticCrsGraphType graph_t;
3370 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3371 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3372 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3374 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
3375 const KCRS & Bk = Bview.origMatrix->getLocalMatrix();
3378 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3382 RCP<const KCRS> Ik_;
3383 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrix());
3384 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3386 if(Ik!=0) Iks = *Ik;
3387 size_t merge_numrows = Ak.numCols();
3389 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3391 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3394 typedef typename Node::execution_space execution_space;
3395 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3396 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3397 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3398 if(
final) Mrowptr(i) = update;
3401 if(Acol2Brow(i)!=LO_INVALID)
3402 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3404 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3407 if(
final && i+1==merge_numrows)
3408 Mrowptr(i+1)=update;
3412 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3413 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3414 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz);
3417 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3418 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3419 if(Acol2Brow(i)!=LO_INVALID) {
3420 size_t row = Acol2Brow(i);
3421 size_t start = Bk.graph.row_map(row);
3422 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3423 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3424 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3428 size_t row = Acol2Irow(i);
3429 size_t start = Iks.graph.row_map(row);
3430 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3431 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3432 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3437 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3460 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3462 void MatrixMatrix::Multiply( \
3463 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3465 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3467 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3468 bool call_FillComplete_on_result, \
3469 const std::string & label, \
3470 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3473 void MatrixMatrix::Jacobi( \
3475 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3476 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3477 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3478 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3479 bool call_FillComplete_on_result, \
3480 const std::string & label, \
3481 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3484 void MatrixMatrix::Add( \
3485 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3488 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3491 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
3494 void MatrixMatrix::Add( \
3495 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3498 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3502 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3503 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3504 (const SCALAR & alpha, \
3505 const bool transposeA, \
3506 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3507 const SCALAR & beta, \
3508 const bool transposeB, \
3509 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3510 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3511 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3512 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3514 template struct MatrixMatrix::AddDetails::AddKernels<SCALAR, LO, GO, NODE>;
3518 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static int TAFC_OptimizationCoreCount()
The core count above which Tpetra::CrsMatrix::transferAndFillComplere will attempt to do advanced nei...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
static void writeSparseFile(const std::string &filename, const Teuchos::RCP< const sparse_matrix_type > &pMatrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
A distributed dense vector.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Matrix Market file readers and writers for Tpetra objects.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.