41 #ifndef TPETRA_TRIPLEMATRIXMULTIPLY_DEF_HPP
42 #define TPETRA_TRIPLEMATRIXMULTIPLY_DEF_HPP
45 #include "TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp"
46 #include "Teuchos_VerboseObject.hpp"
47 #include "Teuchos_Array.hpp"
49 #include "Tpetra_ConfigDefs.hpp"
50 #include "Tpetra_CrsMatrix.hpp"
52 #include "Tpetra_RowMatrixTransposer.hpp"
53 #include "Tpetra_ConfigDefs.hpp"
54 #include "Tpetra_Map.hpp"
55 #include "Tpetra_Export.hpp"
60 #include "Teuchos_FancyOStream.hpp"
74 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
75 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
79 namespace TripleMatrixMultiply{
87 template <
class Scalar,
99 bool call_FillComplete_on_result,
100 const std::string& label,
101 const Teuchos::RCP<Teuchos::ParameterList>& params)
106 typedef LocalOrdinal LO;
107 typedef GlobalOrdinal GO;
116 #ifdef HAVE_TPETRA_MMM_TIMINGS
117 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
118 using Teuchos::TimeMonitor;
119 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP All Setup"))));
122 const std::string prefix =
"TpetraExt::TripleMatrixMultiply::MultiplyRAP(): ";
127 TEUCHOS_TEST_FOR_EXCEPTION(!R.
isFillComplete(), std::runtime_error, prefix <<
"Matrix R is not fill complete.");
128 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
129 TEUCHOS_TEST_FOR_EXCEPTION(!P.
isFillComplete(), std::runtime_error, prefix <<
"Matrix P is not fill complete.");
134 RCP<const crs_matrix_type> Rprime = null;
138 RCP<const crs_matrix_type> Aprime = null;
142 RCP<const crs_matrix_type> Pprime = null;
152 const bool newFlag = !Ac.
getGraph()->isLocallyIndexed() && !Ac.
getGraph()->isGloballyIndexed();
154 if (transposeR && &R != &P) {
155 transposer_type transposer(rcpFromRef (R));
156 Rprime = transposer.createTranspose();
158 Rprime = rcpFromRef(R);
162 transposer_type transposer(rcpFromRef (A));
163 Aprime = transposer.createTranspose();
165 Aprime = rcpFromRef(A);
169 transposer_type transposer(rcpFromRef (P));
170 Pprime = transposer.createTranspose();
172 Pprime = rcpFromRef(P);
185 TEUCHOS_TEST_FOR_EXCEPTION(Rright != Aleft, std::runtime_error,
186 prefix <<
"ERROR, inner dimensions of op(R) and op(A) "
187 "must match for matrix-matrix product. op(R) is "
188 << Rleft <<
"x" << Rright <<
", op(A) is "<< Aleft <<
"x" << Aright);
190 TEUCHOS_TEST_FOR_EXCEPTION(Aright != Pleft, std::runtime_error,
191 prefix <<
"ERROR, inner dimensions of op(A) and op(P) "
192 "must match for matrix-matrix product. op(A) is "
193 << Aleft <<
"x" << Aright <<
", op(P) is "<< Pleft <<
"x" << Pright);
199 TEUCHOS_TEST_FOR_EXCEPTION(Rleft > Ac.
getGlobalNumRows(), std::runtime_error,
200 prefix <<
"ERROR, dimensions of result Ac must "
201 "match dimensions of op(R) * op(A) * op(P). Ac has " << Ac.
getGlobalNumRows()
202 <<
" rows, should have at least " << Rleft << std::endl);
214 int numProcs = P.
getComm()->getSize();
218 crs_matrix_struct_type Rview;
219 crs_matrix_struct_type Aview;
220 crs_matrix_struct_type Pview;
222 RCP<const map_type> targetMap_R = Rprime->getRowMap();
223 RCP<const map_type> targetMap_A = Aprime->getRowMap();
224 RCP<const map_type> targetMap_P = Pprime->getRowMap();
226 #ifdef HAVE_TPETRA_MMM_TIMINGS
227 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP All I&X"))));
233 RCP<const import_type> dummyImporter;
235 if (!(transposeR && &R == &P))
236 MMdetails::import_and_extract_views(*Rprime, targetMap_R, Rview, dummyImporter,
true, label, params);
238 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
243 targetMap_P = Aprime->getColMap();
246 MMdetails::import_and_extract_views(*Pprime, targetMap_P, Pview, Aprime->getGraph()->getImporter(),
false, label, params);
249 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Actemp;
251 bool needs_final_export = !Pprime->getGraph()->getImporter().is_null();
252 if (needs_final_export)
255 Actemp = rcp(&Ac,
false);
257 #ifdef HAVE_TPETRA_MMM_TIMINGS
258 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP All Multiply"))));
262 if (call_FillComplete_on_result && newFlag) {
263 if (transposeR && &R == &P)
264 MMdetails::mult_PT_A_P_newmatrix(Aview, Pview, *Actemp, label, params);
266 MMdetails::mult_R_A_P_newmatrix(Rview, Aview, Pview, *Actemp, label, params);
267 }
else if (call_FillComplete_on_result) {
268 if (transposeR && &R == &P)
269 MMdetails::mult_PT_A_P_reuse(Aview, Pview, *Actemp, label, params);
271 MMdetails::mult_R_A_P_reuse(Rview, Aview, Pview, *Actemp, label, params);
294 if (transposeR && &R == &P)
295 MMdetails::mult_PT_A_P_newmatrix(Aview, Pview, *Actemp, label, params);
297 MMdetails::mult_R_A_P_newmatrix(Rview, Aview, Pview, *Actemp, label, params);
300 if (needs_final_export) {
301 #ifdef HAVE_TPETRA_MMM_TIMINGS
302 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP exportAndFillComplete"))));
304 Teuchos::ParameterList labelList;
305 labelList.set(
"Timer Label", label);
306 Teuchos::ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
308 RCP<crs_matrix_type> Acprime = rcpFromRef(Ac);
310 bool overrideAllreduce =
false;
312 if(!params.is_null()) {
313 Teuchos::ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
315 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
316 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
317 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
318 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
319 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
321 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
323 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
325 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM,
326 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
327 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
329 export_type exporter = export_type(*Pprime->getGraph()->getImporter());
330 Actemp->exportAndFillComplete(Acprime,
332 Acprime->getDomainMap(),
333 Acprime->getRangeMap(),
334 rcp(&labelList,
false));
337 #ifdef HAVE_TPETRA_MMM_STATISTICS
338 printMultiplicationStatistics(Actemp->getGraph()->getExporter(), label+std::string(
" RAP MMM"));
349 template<
class Scalar,
353 void mult_R_A_P_newmatrix(
354 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Rview,
355 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
356 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
357 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
358 const std::string& label,
359 const Teuchos::RCP<Teuchos::ParameterList>& params)
361 using Teuchos::Array;
362 using Teuchos::ArrayRCP;
363 using Teuchos::ArrayView;
368 typedef LocalOrdinal LO;
369 typedef GlobalOrdinal GO;
372 typedef Import<LO,GO,NO> import_type;
373 typedef Map<LO,GO,NO> map_type;
376 typedef typename map_type::local_map_type local_map_type;
378 typedef typename KCRS::StaticCrsGraphType graph_t;
379 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
380 typedef typename NO::execution_space execution_space;
381 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
382 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
384 #ifdef HAVE_TPETRA_MMM_TIMINGS
385 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
386 using Teuchos::TimeMonitor;
387 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP M5 Cmap")))));
389 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
392 RCP<const import_type> Cimport;
393 RCP<const map_type> Ccolmap;
394 RCP<const import_type> Pimport = Pview.origMatrix->getGraph()->getImporter();
395 RCP<const import_type> Iimport = Pview.importMatrix.is_null() ? Teuchos::null : Pview.importMatrix->getGraph()->getImporter();
396 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
397 local_map_type Prowmap_local = Pview.origMatrix->getRowMap()->getLocalMap();
398 local_map_type Irowmap_local;
if(!Pview.importMatrix.is_null()) Irowmap_local = Pview.importMatrix->getRowMap()->getLocalMap();
399 local_map_type Pcolmap_local = Pview.origMatrix->getColMap()->getLocalMap();
400 local_map_type Icolmap_local;
if(!Pview.importMatrix.is_null()) Icolmap_local = Pview.importMatrix->getColMap()->getLocalMap();
408 lo_view_t Pcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Pcol2Ccol"),Pview.colMap->getNodeNumElements()), Icol2Ccol;
410 if (Pview.importMatrix.is_null()) {
413 Ccolmap = Pview.colMap;
414 const LO colMapSize =
static_cast<LO
>(Pview.colMap->getNodeNumElements());
416 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Pcol2Ccol_fill",
417 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
418 KOKKOS_LAMBDA(
const LO i) {
431 if (!Pimport.is_null() && !Iimport.is_null()) {
432 Cimport = Pimport->setUnion(*Iimport);
434 else if (!Pimport.is_null() && Iimport.is_null()) {
435 Cimport = Pimport->setUnion();
437 else if (Pimport.is_null() && !Iimport.is_null()) {
438 Cimport = Iimport->setUnion();
441 throw std::runtime_error(
"TpetraExt::RAP status of matrix importers is nonsensical");
443 Ccolmap = Cimport->getTargetMap();
448 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Pview.origMatrix->getDomainMap()),
449 std::runtime_error,
"Tpetra::RAP: Import setUnion messed with the DomainMap in an unfortunate way");
456 Kokkos::resize(Icol2Ccol,Pview.importMatrix->getColMap()->getNodeNumElements());
457 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
458 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Pcol2Ccol_getGlobalElement",range_type(0,Pview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
459 Pcol2Ccol(i) = Ccolmap_local.getLocalElement(Pcolmap_local.getGlobalElement(i));
461 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Pview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
462 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
471 Ac.replaceColMap(Ccolmap);
489 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
490 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
492 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
493 GO aidx = Acolmap_local.getGlobalElement(i);
494 LO P_LID = Prowmap_local.getLocalElement(aidx);
495 if (P_LID != LO_INVALID) {
496 targetMapToOrigRow(i) = P_LID;
497 targetMapToImportRow(i) = LO_INVALID;
499 LO I_LID = Irowmap_local.getLocalElement(aidx);
500 targetMapToOrigRow(i) = LO_INVALID;
501 targetMapToImportRow(i) = I_LID;
507 KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::
508 mult_R_A_P_newmatrix_kernel_wrapper(Rview, Aview, Pview,
509 targetMapToOrigRow,targetMapToImportRow, Pcol2Ccol, Icol2Ccol,
510 Ac, Cimport, label, params);
515 template<
class Scalar,
519 void mult_R_A_P_reuse(
520 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Rview,
521 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
522 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
523 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
524 const std::string& label,
525 const Teuchos::RCP<Teuchos::ParameterList>& params)
527 using Teuchos::Array;
528 using Teuchos::ArrayRCP;
529 using Teuchos::ArrayView;
534 typedef LocalOrdinal LO;
535 typedef GlobalOrdinal GO;
538 typedef Import<LO,GO,NO> import_type;
539 typedef Map<LO,GO,NO> map_type;
542 typedef typename map_type::local_map_type local_map_type;
544 typedef typename KCRS::StaticCrsGraphType graph_t;
545 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
546 typedef typename NO::execution_space execution_space;
547 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
548 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
550 #ifdef HAVE_TPETRA_MMM_TIMINGS
551 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
552 using Teuchos::TimeMonitor;
553 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP M5 Cmap")))));
555 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
558 RCP<const import_type> Cimport = Ac.getGraph()->getImporter();
559 RCP<const map_type> Ccolmap = Ac.getColMap();
560 RCP<const import_type> Pimport = Pview.origMatrix->getGraph()->getImporter();
561 RCP<const import_type> Iimport = Pview.importMatrix.is_null() ? Teuchos::null : Pview.importMatrix->getGraph()->getImporter();
562 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
563 local_map_type Prowmap_local = Pview.origMatrix->getRowMap()->getLocalMap();
564 local_map_type Irowmap_local;
if(!Pview.importMatrix.is_null()) Irowmap_local = Pview.importMatrix->getRowMap()->getLocalMap();
565 local_map_type Pcolmap_local = Pview.origMatrix->getColMap()->getLocalMap();
566 local_map_type Icolmap_local;
if(!Pview.importMatrix.is_null()) Icolmap_local = Pview.importMatrix->getColMap()->getLocalMap();
567 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
570 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Pview.colMap->getNodeNumElements()), Icol2Ccol;
574 Kokkos::parallel_for(range_type(0,Pview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
575 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Pcolmap_local.getGlobalElement(i));
578 if (!Pview.importMatrix.is_null()) {
579 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Pview.origMatrix->getDomainMap()),
580 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
582 Kokkos::resize(Icol2Ccol,Pview.importMatrix->getColMap()->getNodeNumElements());
583 Kokkos::parallel_for(range_type(0,Pview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
584 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
590 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
591 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
592 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
593 GO aidx = Acolmap_local.getGlobalElement(i);
594 LO B_LID = Prowmap_local.getLocalElement(aidx);
595 if (B_LID != LO_INVALID) {
596 targetMapToOrigRow(i) = B_LID;
597 targetMapToImportRow(i) = LO_INVALID;
599 LO I_LID = Irowmap_local.getLocalElement(aidx);
600 targetMapToOrigRow(i) = LO_INVALID;
601 targetMapToImportRow(i) = I_LID;
608 KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::
609 mult_R_A_P_reuse_kernel_wrapper(Rview, Aview, Pview,
610 targetMapToOrigRow,targetMapToImportRow, Bcol2Ccol, Icol2Ccol,
611 Ac, Cimport, label, params);
616 template<
class Scalar,
620 void mult_PT_A_P_newmatrix(
621 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
622 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
623 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
624 const std::string& label,
625 const Teuchos::RCP<Teuchos::ParameterList>& params)
627 using Teuchos::Array;
628 using Teuchos::ArrayRCP;
629 using Teuchos::ArrayView;
634 typedef LocalOrdinal LO;
635 typedef GlobalOrdinal GO;
638 typedef Import<LO,GO,NO> import_type;
639 typedef Map<LO,GO,NO> map_type;
642 typedef typename map_type::local_map_type local_map_type;
644 typedef typename KCRS::StaticCrsGraphType graph_t;
645 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
646 typedef typename NO::execution_space execution_space;
647 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
648 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
650 #ifdef HAVE_TPETRA_MMM_TIMINGS
651 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
652 using Teuchos::TimeMonitor;
653 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP M5 Cmap")))));
655 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
658 RCP<const import_type> Cimport;
659 RCP<const map_type> Ccolmap;
660 RCP<const import_type> Pimport = Pview.origMatrix->getGraph()->getImporter();
661 RCP<const import_type> Iimport = Pview.importMatrix.is_null() ? Teuchos::null : Pview.importMatrix->getGraph()->getImporter();
662 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
663 local_map_type Prowmap_local = Pview.origMatrix->getRowMap()->getLocalMap();
664 local_map_type Irowmap_local;
if(!Pview.importMatrix.is_null()) Irowmap_local = Pview.importMatrix->getRowMap()->getLocalMap();
665 local_map_type Pcolmap_local = Pview.origMatrix->getColMap()->getLocalMap();
666 local_map_type Icolmap_local;
if(!Pview.importMatrix.is_null()) Icolmap_local = Pview.importMatrix->getColMap()->getLocalMap();
674 lo_view_t Pcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Pcol2Ccol"),Pview.colMap->getNodeNumElements()), Icol2Ccol;
676 if (Pview.importMatrix.is_null()) {
679 Ccolmap = Pview.colMap;
680 const LO colMapSize =
static_cast<LO
>(Pview.colMap->getNodeNumElements());
682 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Pcol2Ccol_fill",
683 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
684 KOKKOS_LAMBDA(
const LO i) {
697 if (!Pimport.is_null() && !Iimport.is_null()) {
698 Cimport = Pimport->setUnion(*Iimport);
700 else if (!Pimport.is_null() && Iimport.is_null()) {
701 Cimport = Pimport->setUnion();
703 else if (Pimport.is_null() && !Iimport.is_null()) {
704 Cimport = Iimport->setUnion();
707 throw std::runtime_error(
"TpetraExt::RAP status of matrix importers is nonsensical");
709 Ccolmap = Cimport->getTargetMap();
714 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Pview.origMatrix->getDomainMap()),
715 std::runtime_error,
"Tpetra::RAP: Import setUnion messed with the DomainMap in an unfortunate way");
722 Kokkos::resize(Icol2Ccol,Pview.importMatrix->getColMap()->getNodeNumElements());
723 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
724 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Pcol2Ccol_getGlobalElement",range_type(0,Pview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
725 Pcol2Ccol(i) = Ccolmap_local.getLocalElement(Pcolmap_local.getGlobalElement(i));
727 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Pview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
728 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
737 Ac.replaceColMap(Ccolmap);
755 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
756 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
758 Kokkos::parallel_for(
"Tpetra::mult_R_A_P_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
759 GO aidx = Acolmap_local.getGlobalElement(i);
760 LO P_LID = Prowmap_local.getLocalElement(aidx);
761 if (P_LID != LO_INVALID) {
762 targetMapToOrigRow(i) = P_LID;
763 targetMapToImportRow(i) = LO_INVALID;
765 LO I_LID = Irowmap_local.getLocalElement(aidx);
766 targetMapToOrigRow(i) = LO_INVALID;
767 targetMapToImportRow(i) = I_LID;
773 KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::
774 mult_PT_A_P_newmatrix_kernel_wrapper(Aview, Pview,
775 targetMapToOrigRow,targetMapToImportRow, Pcol2Ccol, Icol2Ccol,
776 Ac, Cimport, label, params);
780 template<
class Scalar,
784 void mult_PT_A_P_reuse(
785 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
786 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
787 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
788 const std::string& label,
789 const Teuchos::RCP<Teuchos::ParameterList>& params)
791 using Teuchos::Array;
792 using Teuchos::ArrayRCP;
793 using Teuchos::ArrayView;
798 typedef LocalOrdinal LO;
799 typedef GlobalOrdinal GO;
802 typedef Import<LO,GO,NO> import_type;
803 typedef Map<LO,GO,NO> map_type;
806 typedef typename map_type::local_map_type local_map_type;
808 typedef typename KCRS::StaticCrsGraphType graph_t;
809 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
810 typedef typename NO::execution_space execution_space;
811 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
812 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
814 #ifdef HAVE_TPETRA_MMM_TIMINGS
815 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
816 using Teuchos::TimeMonitor;
817 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP M5 Cmap")))));
819 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
822 RCP<const import_type> Cimport = Ac.getGraph()->getImporter();
823 RCP<const map_type> Ccolmap = Ac.getColMap();
824 RCP<const import_type> Pimport = Pview.origMatrix->getGraph()->getImporter();
825 RCP<const import_type> Iimport = Pview.importMatrix.is_null() ? Teuchos::null : Pview.importMatrix->getGraph()->getImporter();
826 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
827 local_map_type Prowmap_local = Pview.origMatrix->getRowMap()->getLocalMap();
828 local_map_type Irowmap_local;
if(!Pview.importMatrix.is_null()) Irowmap_local = Pview.importMatrix->getRowMap()->getLocalMap();
829 local_map_type Pcolmap_local = Pview.origMatrix->getColMap()->getLocalMap();
830 local_map_type Icolmap_local;
if(!Pview.importMatrix.is_null()) Icolmap_local = Pview.importMatrix->getColMap()->getLocalMap();
831 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
834 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Pview.colMap->getNodeNumElements()), Icol2Ccol;
838 Kokkos::parallel_for(range_type(0,Pview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
839 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Pcolmap_local.getGlobalElement(i));
842 if (!Pview.importMatrix.is_null()) {
843 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Pview.origMatrix->getDomainMap()),
844 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
846 Kokkos::resize(Icol2Ccol,Pview.importMatrix->getColMap()->getNodeNumElements());
847 Kokkos::parallel_for(range_type(0,Pview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
848 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
854 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
855 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
856 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
857 GO aidx = Acolmap_local.getGlobalElement(i);
858 LO B_LID = Prowmap_local.getLocalElement(aidx);
859 if (B_LID != LO_INVALID) {
860 targetMapToOrigRow(i) = B_LID;
861 targetMapToImportRow(i) = LO_INVALID;
863 LO I_LID = Irowmap_local.getLocalElement(aidx);
864 targetMapToOrigRow(i) = LO_INVALID;
865 targetMapToImportRow(i) = I_LID;
872 KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::
873 mult_PT_A_P_reuse_kernel_wrapper(Aview, Pview,
874 targetMapToOrigRow,targetMapToImportRow, Bcol2Ccol, Icol2Ccol,
875 Ac, Cimport, label, params);
882 template<
class Scalar,
886 class LocalOrdinalViewType>
887 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Rview,
888 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
889 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
890 const LocalOrdinalViewType & Acol2Prow,
891 const LocalOrdinalViewType & Acol2PIrow,
892 const LocalOrdinalViewType & Pcol2Accol,
893 const LocalOrdinalViewType & PIcol2Accol,
894 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
895 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Acimport,
896 const std::string& label,
897 const Teuchos::RCP<Teuchos::ParameterList>& params) {
898 #ifdef HAVE_TPETRA_MMM_TIMINGS
899 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
900 using Teuchos::TimeMonitor;
901 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Newmatrix SerialCore"))));
904 using Teuchos::Array;
905 using Teuchos::ArrayRCP;
906 using Teuchos::ArrayView;
912 typedef typename KCRS::StaticCrsGraphType graph_t;
913 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
914 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
915 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
916 typedef typename KCRS::values_type::non_const_type scalar_view_t;
919 typedef LocalOrdinal LO;
920 typedef GlobalOrdinal GO;
922 typedef Map<LO,GO,NO> map_type;
923 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
924 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
925 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
928 RCP<const map_type> Accolmap = Ac.getColMap();
929 size_t m = Rview.origMatrix->getNodeNumRows();
930 size_t n = Accolmap->getNodeNumElements();
931 size_t p_max_nnz_per_row = Pview.origMatrix->getNodeMaxNumRowEntries();
934 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
935 const KCRS & Pmat = Pview.origMatrix->getLocalMatrix();
936 const KCRS & Rmat = Rview.origMatrix->getLocalMatrix();
938 c_lno_view_t Arowptr = Amat.graph.row_map, Prowptr = Pmat.graph.row_map, Rrowptr = Rmat.graph.row_map;
939 const lno_nnz_view_t Acolind = Amat.graph.entries, Pcolind = Pmat.graph.entries , Rcolind = Rmat.graph.entries;
940 const scalar_view_t Avals = Amat.values, Pvals = Pmat.values, Rvals = Rmat.values;
942 c_lno_view_t Irowptr;
943 lno_nnz_view_t Icolind;
945 if(!Pview.importMatrix.is_null()) {
946 Irowptr = Pview.importMatrix->getLocalMatrix().graph.row_map;
947 Icolind = Pview.importMatrix->getLocalMatrix().graph.entries;
948 Ivals = Pview.importMatrix->getLocalMatrix().values;
949 p_max_nnz_per_row = std::max(p_max_nnz_per_row,Pview.importMatrix->getNodeMaxNumRowEntries());
952 #ifdef HAVE_TPETRA_MMM_TIMINGS
953 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Newmatrix SerialCore - Compare"))));
963 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Pview.origMatrix), n);
964 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
965 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
966 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
976 const size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
977 Array<size_t> ac_status(n, ST_INVALID);
987 size_t nnz = 0, nnz_old = 0;
988 for (
size_t i = 0; i < m; i++) {
994 for (
size_t kk = Rrowptr[i]; kk < Rrowptr[i+1]; kk++) {
996 const SC Rik = Rvals[kk];
1000 for (
size_t ll = Arowptr[k]; ll < Arowptr[k+1]; ll++) {
1002 const SC Akl = Avals[ll];
1007 if (Acol2Prow[l] != LO_INVALID) {
1014 size_t Pl = Teuchos::as<size_t>(Acol2Prow[l]);
1017 for (
size_t jj = Prowptr[Pl]; jj < Prowptr[Pl+1]; jj++) {
1019 LO Acj = Pcol2Accol[j];
1022 if (ac_status[Acj] == INVALID || ac_status[Acj] < nnz_old) {
1023 #ifdef HAVE_TPETRA_DEBUG
1025 TEUCHOS_TEST_FOR_EXCEPTION(nnz >= Teuchos::as<size_t>(Ccolind.size()),
1027 label <<
" ERROR, not enough memory allocated for matrix product. Allocated: " << Ccolind.extent(0) << std::endl);
1030 ac_status[Acj] = nnz;
1032 Cvals[nnz] = Rik*Akl*Plj;
1035 Cvals[ac_status[Acj]] += Rik*Akl*Plj;
1045 size_t Il = Teuchos::as<size_t>(Acol2PIrow[l]);
1046 for (
size_t jj = Irowptr[Il]; jj < Irowptr[Il+1]; jj++) {
1048 LO Acj = PIcol2Accol[j];
1051 if (ac_status[Acj] == INVALID || ac_status[Acj] < nnz_old) {
1052 #ifdef HAVE_TPETRA_DEBUG
1054 TEUCHOS_TEST_FOR_EXCEPTION(nnz >= Teuchos::as<size_t>(Ccolind.size()),
1056 label <<
" ERROR, not enough memory allocated for matrix product. Allocated: " << Ccolind.extent(0) << std::endl);
1059 ac_status[Acj] = nnz;
1061 Cvals[nnz] = Rik*Akl*Plj;
1064 Cvals[ac_status[Acj]] += Rik*Akl*Plj;
1071 if (nnz + n > CSR_alloc) {
1073 Kokkos::resize(Ccolind,CSR_alloc);
1074 Kokkos::resize(Cvals,CSR_alloc);
1082 Kokkos::resize(Ccolind,nnz);
1083 Kokkos::resize(Cvals,nnz);
1085 #ifdef HAVE_TPETRA_MMM_TIMINGS
1086 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Newmatrix Final Sort"))));
1090 if (params.is_null() || params->get(
"sort entries",
true))
1091 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
1092 Ac.setAllValues(Crowptr, Ccolind, Cvals);
1094 #ifdef HAVE_TPETRA_MMM_TIMINGS
1095 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Newmatrix ESFC"))));
1106 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
1107 labelList->set(
"Timer Label",label);
1108 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
1109 RCP<const Export<LO,GO,NO> > dummyExport;
1110 Ac.expertStaticFillComplete(Pview.origMatrix->getDomainMap(),
1111 Rview.origMatrix->getRangeMap(),
1121 template<
class Scalar,
1123 class GlobalOrdinal,
1125 class LocalOrdinalViewType>
1126 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Rview,
1127 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1128 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
1129 const LocalOrdinalViewType & Acol2Prow,
1130 const LocalOrdinalViewType & Acol2PIrow,
1131 const LocalOrdinalViewType & Pcol2Accol,
1132 const LocalOrdinalViewType & PIcol2Accol,
1133 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
1134 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Acimport,
1135 const std::string& label,
1136 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1137 #ifdef HAVE_TPETRA_MMM_TIMINGS
1138 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1139 using Teuchos::TimeMonitor;
1140 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse SerialCore"))));
1143 using Teuchos::Array;
1144 using Teuchos::ArrayRCP;
1145 using Teuchos::ArrayView;
1151 typedef typename KCRS::StaticCrsGraphType graph_t;
1152 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1153 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1154 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1157 typedef LocalOrdinal LO;
1158 typedef GlobalOrdinal GO;
1160 typedef Map<LO,GO,NO> map_type;
1161 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1162 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1163 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1166 RCP<const map_type> Accolmap = Ac.getColMap();
1167 size_t m = Rview.origMatrix->getNodeNumRows();
1168 size_t n = Accolmap->getNodeNumElements();
1169 size_t p_max_nnz_per_row = Pview.origMatrix->getNodeMaxNumRowEntries();
1172 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
1173 const KCRS & Pmat = Pview.origMatrix->getLocalMatrix();
1174 const KCRS & Rmat = Rview.origMatrix->getLocalMatrix();
1175 const KCRS & Cmat = Ac.getLocalMatrix();
1177 c_lno_view_t Arowptr = Amat.graph.row_map, Prowptr = Pmat.graph.row_map, Rrowptr = Rmat.graph.row_map, Crowptr = Cmat.graph.row_map;
1178 const lno_nnz_view_t Acolind = Amat.graph.entries, Pcolind = Pmat.graph.entries , Rcolind = Rmat.graph.entries, Ccolind = Cmat.graph.entries;
1179 const scalar_view_t Avals = Amat.values, Pvals = Pmat.values, Rvals = Rmat.values;
1180 scalar_view_t Cvals = Cmat.values;
1182 c_lno_view_t Irowptr;
1183 lno_nnz_view_t Icolind;
1184 scalar_view_t Ivals;
1185 if(!Pview.importMatrix.is_null()) {
1186 Irowptr = Pview.importMatrix->getLocalMatrix().graph.row_map;
1187 Icolind = Pview.importMatrix->getLocalMatrix().graph.entries;
1188 Ivals = Pview.importMatrix->getLocalMatrix().values;
1189 p_max_nnz_per_row = std::max(p_max_nnz_per_row,Pview.importMatrix->getNodeMaxNumRowEntries());
1192 #ifdef HAVE_TPETRA_MMM_TIMINGS
1193 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse SerialCore - Compare"))));
1204 Array<size_t> ac_status(n, ST_INVALID);
1214 size_t OLD_ip = 0, CSR_ip = 0;
1215 for (
size_t i = 0; i < m; i++) {
1218 OLD_ip = Crowptr[i];
1219 CSR_ip = Crowptr[i+1];
1220 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
1221 ac_status[Ccolind[k]] = k;
1228 for (
size_t kk = Rrowptr[i]; kk < Rrowptr[i+1]; kk++) {
1230 const SC Rik = Rvals[kk];
1234 for (
size_t ll = Arowptr[k]; ll < Arowptr[k+1]; ll++) {
1236 const SC Akl = Avals[ll];
1241 if (Acol2Prow[l] != LO_INVALID) {
1248 size_t Pl = Teuchos::as<size_t>(Acol2Prow[l]);
1251 for (
size_t jj = Prowptr[Pl]; jj < Prowptr[Pl+1]; jj++) {
1253 LO Cij = Pcol2Accol[j];
1256 TEUCHOS_TEST_FOR_EXCEPTION(ac_status[Cij] < OLD_ip || ac_status[Cij] >= CSR_ip,
1257 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
1258 "(c_status = " << ac_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
1260 Cvals[ac_status[Cij]] += Rik*Akl*Plj;
1269 size_t Il = Teuchos::as<size_t>(Acol2PIrow[l]);
1270 for (
size_t jj = Irowptr[Il]; jj < Irowptr[Il+1]; jj++) {
1272 LO Cij = PIcol2Accol[j];
1275 TEUCHOS_TEST_FOR_EXCEPTION(ac_status[Cij] < OLD_ip || ac_status[Cij] >= CSR_ip,
1276 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
1277 "(c_status = " << ac_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
1279 Cvals[ac_status[Cij]] += Rik*Akl*Plj;
1286 #ifdef HAVE_TPETRA_MMM_TIMINGS
1287 auto MM3 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse ESFC"))));
1290 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
1298 template<
class Scalar,
1300 class GlobalOrdinal,
1302 class LocalOrdinalViewType>
1303 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1304 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
1305 const LocalOrdinalViewType & Acol2Prow,
1306 const LocalOrdinalViewType & Acol2PIrow,
1307 const LocalOrdinalViewType & Pcol2Accol,
1308 const LocalOrdinalViewType & PIcol2Accol,
1309 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
1310 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Acimport,
1311 const std::string& label,
1312 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1313 #ifdef HAVE_TPETRA_MMM_TIMINGS
1314 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1315 using Teuchos::TimeMonitor;
1316 Teuchos::TimeMonitor MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
1320 typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Node> transposer_type;
1321 transposer_type transposer (Pview.origMatrix,label+std::string(
"XP: "));
1322 Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1323 if (!params.is_null())
1324 transposeParams->set(
"compute global constants",
1325 params->get(
"compute global constants: temporaries",
1327 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Ptrans = transposer.createTransposeLocal(transposeParams);
1328 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node> Rview;
1329 Rview.origMatrix = Ptrans;
1331 mult_R_A_P_newmatrix_kernel_wrapper(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
1337 template<
class Scalar,
1339 class GlobalOrdinal,
1341 class LocalOrdinalViewType>
1342 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1343 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
1344 const LocalOrdinalViewType & Acol2Prow,
1345 const LocalOrdinalViewType & Acol2PIrow,
1346 const LocalOrdinalViewType & Pcol2Accol,
1347 const LocalOrdinalViewType & PIcol2Accol,
1348 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
1349 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Acimport,
1350 const std::string& label,
1351 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1352 #ifdef HAVE_TPETRA_MMM_TIMINGS
1353 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1354 using Teuchos::TimeMonitor;
1355 Teuchos::TimeMonitor MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
1359 typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Node> transposer_type;
1360 transposer_type transposer (Pview.origMatrix,label+std::string(
"XP: "));
1361 Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1362 if (!params.is_null())
1363 transposeParams->set(
"compute global constants",
1364 params->get(
"compute global constants: temporaries",
1366 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Ptrans = transposer.createTransposeLocal(transposeParams);
1367 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node> Rview;
1368 Rview.origMatrix = Ptrans;
1370 mult_R_A_P_reuse_kernel_wrapper(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
1378 template<
class Scalar,
1380 class GlobalOrdinal,
1382 void KernelWrappers3MMM<Scalar,LocalOrdinal,GlobalOrdinal,Node>::mult_PT_A_P_newmatrix_kernel_wrapper_2pass(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1383 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Pview,
1384 const Teuchos::Array<LocalOrdinal> & Acol2PRow,
1385 const Teuchos::Array<LocalOrdinal> & Acol2PRowImport,
1386 const Teuchos::Array<LocalOrdinal> & Pcol2Accol,
1387 const Teuchos::Array<LocalOrdinal> & PIcol2Accol,
1388 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Ac,
1389 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Acimport,
1390 const std::string& label,
1391 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1392 #ifdef HAVE_TPETRA_MMM_TIMINGS
1393 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1394 using Teuchos::TimeMonitor;
1395 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP Newmatrix SerialCore"))));
1398 using Teuchos::Array;
1399 using Teuchos::ArrayRCP;
1400 using Teuchos::ArrayView;
1405 typedef LocalOrdinal LO;
1406 typedef GlobalOrdinal GO;
1408 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1409 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1410 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1415 size_t n = Ac.getRowMap()->getNodeNumElements();
1416 LO maxAccol = Ac.getColMap()->getMaxLocalIndex();
1419 ArrayRCP<const size_t> Arowptr_RCP, Prowptr_RCP, Irowptr_RCP;
1420 ArrayRCP<size_t> Acrowptr_RCP;
1421 ArrayRCP<const LO> Acolind_RCP, Pcolind_RCP, Icolind_RCP;
1422 ArrayRCP<LO> Accolind_RCP;
1423 ArrayRCP<const Scalar> Avals_RCP, Pvals_RCP, Ivals_RCP;
1424 ArrayRCP<SC> Acvals_RCP;
1431 Aview.origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1432 Pview.origMatrix->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1434 if (!Pview.importMatrix.is_null())
1435 Pview.importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1442 ArrayView<const size_t> Arowptr, Prowptr, Irowptr;
1443 ArrayView<const LO> Acolind, Pcolind, Icolind;
1444 ArrayView<const SC> Avals, Pvals, Ivals;
1445 ArrayView<size_t> Acrowptr;
1446 ArrayView<LO> Accolind;
1447 ArrayView<SC> Acvals;
1448 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1449 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1450 if (!Pview.importMatrix.is_null()) {
1451 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1463 #ifdef HAVE_TPETRA_MMM_TIMINGS
1464 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
1471 ArrayRCP<const size_t> Rrowptr_RCP;
1472 ArrayRCP<const LO> Rcolind_RCP;
1473 ArrayRCP<const Scalar> Rvals_RCP;
1474 ArrayView<const size_t> Rrowptr;
1475 ArrayView<const LO> Rcolind;
1476 ArrayView<const SC> Rvals;
1478 transposer_type transposer (Pview.origMatrix,label+std::string(
"XP: "));
1479 RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1480 if (!params.is_null())
1481 transposeParams->set(
"compute global constants",
1482 params->get(
"compute global constants: temporaries",
1484 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Ptrans = transposer.createTransposeLocal(transposeParams);
1486 Ptrans->getAllValues(Rrowptr_RCP, Rcolind_RCP, Rvals_RCP);
1487 Rrowptr = Rrowptr_RCP();
1488 Rcolind = Rcolind_RCP();
1489 Rvals = Rvals_RCP();
1494 #ifdef HAVE_TPETRA_MMM_TIMINGS
1495 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP graph"))));
1498 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1499 Array<size_t> ac_status(maxAccol + 1, ST_INVALID);
1501 size_t nnz_alloc = std::max(Ac_estimate_nnz(*Aview.origMatrix, *Pview.origMatrix), n);
1502 size_t nnzPerRowA = 100;
1503 if (Aview.origMatrix->getNodeNumEntries() > 0)
1504 nnzPerRowA = Aview.origMatrix->getNodeNumEntries()/Aview.origMatrix->getNodeNumRows();
1505 Acrowptr_RCP.resize(n+1);
1506 Acrowptr = Acrowptr_RCP();
1507 Accolind_RCP.resize(nnz_alloc);
1508 Accolind = Accolind_RCP();
1510 size_t nnz = 0, nnz_old = 0;
1511 for (
size_t i = 0; i < n; i++) {
1517 for (
size_t kk = Rrowptr[i]; kk < Rrowptr[i+1]; kk++) {
1520 for (
size_t ll = Arowptr[k]; ll < Arowptr[k+1]; ll++) {
1523 if (Acol2PRow[l] != LO_INVALID) {
1530 size_t Pl = Teuchos::as<size_t>(Acol2PRow[l]);
1533 for (
size_t jj = Prowptr[Pl]; jj < Prowptr[Pl+1]; jj++) {
1535 LO Acj = Pcol2Accol[j];
1537 if (ac_status[Acj] == ST_INVALID || ac_status[Acj] < nnz_old) {
1539 ac_status[Acj] = nnz;
1540 Accolind[nnz] = Acj;
1551 size_t Il = Teuchos::as<size_t>(Acol2PRowImport[l]);
1552 for (
size_t jj = Irowptr[Il]; jj < Irowptr[Il+1]; jj++) {
1554 LO Acj = PIcol2Accol[j];
1556 if (ac_status[Acj] == ST_INVALID || ac_status[Acj] < nnz_old){
1558 ac_status[Acj] = nnz;
1559 Accolind[nnz] = Acj;
1569 if (nnz + std::max(5*nnzPerRowA, n) > nnz_alloc) {
1571 nnz_alloc = std::max(nnz_alloc, nnz + std::max(5*nnzPerRowA, n));
1572 Accolind_RCP.resize(nnz_alloc); Accolind = Accolind_RCP();
1573 Acvals_RCP.resize(nnz_alloc); Acvals = Acvals_RCP();
1580 Accolind_RCP.resize(nnz);
1581 Accolind = Accolind_RCP();
1584 Acvals_RCP.resize(nnz, SC_ZERO);
1585 Acvals = Acvals_RCP();
1592 #ifdef HAVE_TPETRA_MMM_TIMINGS
1593 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP Newmatrix Fill Matrix"))));
1597 for (
size_t k = 0; k < n; k++) {
1598 for (
size_t ii = Prowptr[k]; ii < Prowptr[k+1]; ii++) {
1600 const SC Pki = Pvals[ii];
1601 for (
size_t ll = Arowptr[k]; ll < Arowptr[k+1]; ll++) {
1603 const SC Akl = Avals[ll];
1606 if (Acol2PRow[l] != LO_INVALID) {
1613 size_t Pl = Teuchos::as<size_t>(Acol2PRow[l]);
1614 for (
size_t jj = Prowptr[Pl]; jj < Prowptr[Pl+1]; jj++) {
1616 LO Acj = Pcol2Accol[j];
1618 for (pp = Acrowptr[i]; pp < Acrowptr[i+1]; pp++)
1619 if (Accolind[pp] == Acj)
1623 Acvals[pp] += Pki*Akl*Pvals[jj];
1632 size_t Il = Teuchos::as<size_t>(Acol2PRowImport[l]);
1633 for (
size_t jj = Irowptr[Il]; jj < Irowptr[Il+1]; jj++) {
1635 LO Acj = PIcol2Accol[j];
1637 for (pp = Acrowptr[i]; pp < Acrowptr[i+1]; pp++)
1638 if (Accolind[pp] == Acj)
1642 Acvals[pp] += Pki*Akl*Ivals[jj];
1650 #ifdef HAVE_TPETRA_MMM_TIMINGS
1651 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP sort"))));
1658 Import_Util::sortCrsEntries(Acrowptr_RCP(), Accolind_RCP(), Acvals_RCP());
1661 Ac.setAllValues(Acrowptr_RCP, Accolind_RCP, Acvals_RCP);
1663 #ifdef HAVE_TPETRA_MMM_TIMINGS
1664 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP Newmatrix ESFC"))));
1675 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
1676 labelList->set(
"Timer Label",label);
1678 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
1679 RCP<const Export<LO,GO,NO> > dummyExport;
1680 Ac.expertStaticFillComplete(Pview.origMatrix->getDomainMap(),
1681 Pview.origMatrix->getDomainMap(),
1683 dummyExport, labelList);
1697 #define TPETRA_TRIPLEMATRIXMULTIPLY_INSTANT(SCALAR,LO,GO,NODE) \
1700 void TripleMatrixMultiply::MultiplyRAP( \
1701 const CrsMatrix< SCALAR , LO , GO , NODE >& R, \
1703 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
1705 const CrsMatrix< SCALAR , LO , GO , NODE >& P, \
1707 CrsMatrix< SCALAR , LO , GO , NODE >& Ac, \
1708 bool call_FillComplete_on_result, \
1709 const std::string & label, \
1710 const Teuchos::RCP<Teuchos::ParameterList>& params); \
1714 #endif // TPETRA_TRIPLEMATRIXMULTIPLY_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static int TAFC_OptimizationCoreCount()
The core count above which Tpetra::CrsMatrix::transferAndFillComplere will attempt to do advanced nei...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
bool isFillActive() const
Whether the matrix is not fill complete.
size_t global_size_t
Global size_t object.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Utility functions for packing and unpacking sparse matrix entries.
void MultiplyRAP(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &R, bool transposeR, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &P, bool transposeP, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Ac, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.