10 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_DEF_HPP
13 #include "KokkosSparse_Utils.hpp"
14 #include "Tpetra_ConfigDefs.hpp"
16 #include "Teuchos_VerboseObject.hpp"
17 #include "Teuchos_Array.hpp"
19 #include "Tpetra_CrsMatrix.hpp"
20 #include "Tpetra_BlockCrsMatrix.hpp"
22 #include "Tpetra_RowMatrixTransposer.hpp"
25 #include "Tpetra_Details_makeColMap.hpp"
26 #include "Tpetra_ConfigDefs.hpp"
27 #include "Tpetra_Map.hpp"
28 #include "Tpetra_Export.hpp"
34 #include <type_traits>
35 #include "Teuchos_FancyOStream.hpp"
37 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
40 #include "KokkosSparse_spgemm.hpp"
41 #include "KokkosSparse_spadd.hpp"
42 #include "Kokkos_Bitset.hpp"
54 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
55 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
56 #include "TpetraExt_MatrixMatrix_HIP.hpp"
57 #include "TpetraExt_MatrixMatrix_SYCL.hpp"
61 namespace MatrixMatrix {
69 template <
class Scalar,
79 bool call_FillComplete_on_result,
80 const std::string& label,
81 const Teuchos::RCP<Teuchos::ParameterList>& params) {
86 typedef LocalOrdinal LO;
87 typedef GlobalOrdinal GO;
97 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
102 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
103 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
108 RCP<const crs_matrix_type> Aprime = null;
112 RCP<const crs_matrix_type> Bprime = null;
122 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
124 bool use_optimized_ATB =
false;
125 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
126 use_optimized_ATB =
true;
128 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
129 use_optimized_ATB =
false;
132 using Teuchos::ParameterList;
133 RCP<ParameterList> transposeParams(
new ParameterList);
134 transposeParams->set(
"sort",
true);
136 if (!use_optimized_ATB && transposeA) {
137 transposer_type transposer(rcpFromRef(A));
138 Aprime = transposer.createTranspose(transposeParams);
140 Aprime = rcpFromRef(A);
144 transposer_type transposer(rcpFromRef(B));
145 Bprime = transposer.createTranspose(transposeParams);
147 Bprime = rcpFromRef(B);
157 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
158 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
159 "must match for matrix-matrix product. op(A) is "
160 << Aouter <<
"x" << Ainner <<
", op(B) is " << Binner <<
"x" << Bouter);
166 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
167 prefix <<
"ERROR, dimensions of result C must "
168 "match dimensions of op(A) * op(B). C has "
170 <<
" rows, should have at least " << Aouter << std::endl);
182 int numProcs = A.
getComm()->getSize();
186 crs_matrix_struct_type Aview;
187 crs_matrix_struct_type Bview;
189 RCP<const map_type> targetMap_A = Aprime->getRowMap();
190 RCP<const map_type> targetMap_B = Bprime->getRowMap();
198 if (!use_optimized_ATB) {
199 RCP<const import_type> dummyImporter;
200 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
206 targetMap_B = Aprime->getColMap();
209 if (!use_optimized_ATB)
210 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(), Aprime->getGraph()->getImporter().is_null(), label, params);
219 if (use_optimized_ATB) {
220 MMdetails::mult_AT_B_newmatrix(A, B, C, label, params);
222 }
else if (call_FillComplete_on_result && newFlag) {
223 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label, params);
225 }
else if (call_FillComplete_on_result) {
226 MMdetails::mult_A_B_reuse(Aview, Bview, C, label, params);
232 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
234 MMdetails::mult_A_B(Aview, Bview, crsmat, label, params);
246 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
254 template <
class Scalar,
264 const std::string& label) {
269 typedef LocalOrdinal LO;
270 typedef GlobalOrdinal GO;
276 std::string prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
278 TEUCHOS_TEST_FOR_EXCEPTION(transposeA ==
true, std::runtime_error, prefix <<
"Matrix A cannot be transposed.");
279 TEUCHOS_TEST_FOR_EXCEPTION(transposeB ==
true, std::runtime_error, prefix <<
"Matrix B cannot be transposed.");
291 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
292 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
293 "must match for matrix-matrix product. op(A) is "
294 << Aouter <<
"x" << Ainner <<
", op(B) is " << Binner <<
"x" << Bouter);
298 int numProcs = A->getComm()->getSize();
300 const LO blocksize = A->getBlockSize();
301 TEUCHOS_TEST_FOR_EXCEPTION(blocksize != B->getBlockSize(), std::runtime_error,
302 prefix <<
"ERROR, Blocksizes do not match. A.blocksize = " << blocksize <<
", B.blocksize = " << B->getBlockSize());
306 blockcrs_matrix_struct_type Aview(blocksize);
307 blockcrs_matrix_struct_type Bview(blocksize);
309 RCP<const map_type> targetMap_A = A->getRowMap();
310 RCP<const map_type> targetMap_B = B->getRowMap();
313 RCP<const import_type> dummyImporter;
314 MMdetails::import_and_extract_views(*A, targetMap_A, Aview, dummyImporter,
true);
319 targetMap_B = A->getColMap();
322 MMdetails::import_and_extract_views(*B, targetMap_B, Bview, A->getGraph()->getImporter(),
323 A->getGraph()->getImporter().is_null());
326 MMdetails::mult_A_B_newmatrix(Aview, Bview, C);
329 template <
class Scalar,
338 bool call_FillComplete_on_result,
339 const std::string& label,
340 const Teuchos::RCP<Teuchos::ParameterList>& params) {
344 typedef LocalOrdinal LO;
345 typedef GlobalOrdinal GO;
354 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
359 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
360 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
362 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
363 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
372 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
373 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
374 "must match for matrix-matrix product. op(A) is "
375 << Aouter <<
"x" << Ainner <<
", op(B) is " << Binner <<
"x" << Bouter);
381 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
382 prefix <<
"ERROR, dimensions of result C must "
383 "match dimensions of op(A) * op(B). C has "
385 <<
" rows, should have at least " << Aouter << std::endl);
397 int numProcs = A.
getComm()->getSize();
401 crs_matrix_struct_type Aview;
402 crs_matrix_struct_type Bview;
404 RCP<const map_type> targetMap_A = Aprime->getRowMap();
405 RCP<const map_type> targetMap_B = Bprime->getRowMap();
411 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
412 if (!params.is_null()) {
413 importParams1->set(
"compute global constants", params->get(
"compute global constants: temporaries",
false));
414 int mm_optimization_core_count = 0;
415 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
417 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
418 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
419 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
420 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
421 auto& ip1slist = importParams1->sublist(
"matrixmatrix: kernel params",
false);
422 ip1slist.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
423 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
424 ip1slist.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
428 RCP<const import_type> dummyImporter;
429 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, importParams1);
434 targetMap_B = Aprime->getColMap();
439 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
440 if (!params.is_null()) {
441 importParams2->set(
"compute global constants", params->get(
"compute global constants: temporaries",
false));
443 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
445 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
446 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
447 auto& ip2slist = importParams2->sublist(
"matrixmatrix: kernel params",
false);
448 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
449 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
450 ip2slist.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
451 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
452 ip2slist.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
455 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(), Aprime->getGraph()->getImporter().is_null(), label, importParams2);
461 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
464 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
466 if (call_FillComplete_on_result && newFlag) {
467 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
469 }
else if (call_FillComplete_on_result) {
470 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
473 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
476 if (!params.is_null()) {
477 bool removeZeroEntries = params->get(
"remove zeros",
false);
478 if (removeZeroEntries) {
479 typedef Teuchos::ScalarTraits<Scalar> STS;
480 typename STS::magnitudeType threshold = params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
486 template <
class Scalar,
496 using Teuchos::Array;
500 typedef LocalOrdinal LO;
501 typedef GlobalOrdinal GO;
506 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
508 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
509 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
510 "(Result matrix B is not required to be isFillComplete()).");
511 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete(), std::runtime_error,
512 prefix <<
"ERROR, input matrix B must not be fill complete!");
513 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph(), std::runtime_error,
514 prefix <<
"ERROR, input matrix B must not have static graph!");
516 prefix <<
"ERROR, input matrix B must not be locally indexed!");
518 using Teuchos::ParameterList;
519 RCP<ParameterList> transposeParams(
new ParameterList);
520 transposeParams->set(
"sort",
false);
522 RCP<const crs_matrix_type> Aprime = null;
524 transposer_type transposer(rcpFromRef(A));
525 Aprime = transposer.createTranspose(transposeParams);
527 Aprime = rcpFromRef(A);
535 if (scalarB != Teuchos::ScalarTraits<SC>::one())
539 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
540 for (LO i = 0; (size_t)i < numMyRows; ++i) {
541 row = B.
getRowMap()->getGlobalElement(i);
542 Aprime->getGlobalRowCopy(row, a_inds, a_vals, a_numEntries);
544 if (scalarA != Teuchos::ScalarTraits<SC>::one()) {
545 for (
size_t j = 0; j < a_numEntries; ++j)
546 a_vals[j] *= scalarA;
548 B.
insertGlobalValues(row, a_numEntries, reinterpret_cast<Scalar*>(a_vals.data()), a_inds.data());
553 template <
class Scalar,
557 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
558 add(
const Scalar& alpha,
559 const bool transposeA,
562 const bool transposeB,
566 const Teuchos::RCP<Teuchos::ParameterList>& params) {
567 using Teuchos::ParameterList;
570 using Teuchos::rcpFromRef;
572 if (!params.is_null()) {
573 TEUCHOS_TEST_FOR_EXCEPTION(
574 params->isParameter(
"Call fillComplete") && !params->get<
bool>(
"Call fillComplete"),
575 std::invalid_argument,
576 "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n"
577 "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
578 params->set(
"Call fillComplete",
true);
582 RCP<const crs_matrix_type> Brcp = rcpFromRef(B);
584 RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(Brcp);
585 Brcp = transposer.createTranspose();
588 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete() || !Brcp->isFillComplete(), std::invalid_argument,
589 "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
590 RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(Brcp->getRowMap(), 0));
592 add(alpha, transposeA, A, beta,
false, *Brcp, *C, domainMap, rangeMap, params);
600 template <
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
601 struct ConvertGlobalToLocalFunctor {
602 ConvertGlobalToLocalFunctor(LOView& lids_,
const GOView& gids_,
const LocalMap localColMap_)
605 , localColMap(localColMap_) {}
607 KOKKOS_FUNCTION
void operator()(
const GO i)
const {
608 lids(i) = localColMap.getLocalElement(gids(i));
613 const LocalMap localColMap;
616 template <
class Scalar,
620 void add(
const Scalar& alpha,
621 const bool transposeA,
622 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
624 const bool transposeB,
625 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
626 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
627 const Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMap,
628 const Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>>& rangeMap,
629 const Teuchos::RCP<Teuchos::ParameterList>& params) {
632 using Teuchos::rcp_dynamic_cast;
633 using Teuchos::rcp_implicit_cast;
634 using Teuchos::rcpFromRef;
635 using Teuchos::TimeMonitor;
637 using LO = LocalOrdinal;
638 using GO = GlobalOrdinal;
640 using crs_matrix_type = CrsMatrix<SC, LO, GO, NO>;
641 using crs_graph_type = CrsGraph<LO, GO, NO>;
642 using map_type = Map<LO, GO, NO>;
643 using transposer_type = RowMatrixTransposer<SC, LO, GO, NO>;
644 using import_type = Import<LO, GO, NO>;
645 using export_type = Export<LO, GO, NO>;
646 using exec_space =
typename crs_graph_type::execution_space;
647 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
648 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
649 constexpr
bool debug =
false;
654 std::ostringstream os;
655 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
656 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
657 std::cerr << os.str();
660 TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() || C.isGloballyIndexed(), std::invalid_argument,
661 prefix_mmm <<
"C must be a 'new' matrix (neither locally nor globally indexed).");
662 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), std::invalid_argument,
663 prefix_mmm <<
"A and B must both be fill complete.");
664 #ifdef HAVE_TPETRA_DEBUG
666 if (A.isFillComplete() && B.isFillComplete()) {
667 const bool domainMapsSame =
668 (!transposeA && !transposeB &&
669 !A.getDomainMap()->locallySameAs(*B.getDomainMap())) ||
670 (!transposeA && transposeB &&
671 !A.getDomainMap()->isSameAs(*B.getRangeMap())) ||
672 (transposeA && !transposeB &&
673 !A.getRangeMap()->isSameAs(*B.getDomainMap()));
674 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
675 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
677 const bool rangeMapsSame =
678 (!transposeA && !transposeB &&
679 !A.getRangeMap()->isSameAs(*B.getRangeMap())) ||
680 (!transposeA && transposeB &&
681 !A.getRangeMap()->isSameAs(*B.getDomainMap())) ||
682 (transposeA && !transposeB &&
683 !A.getDomainMap()->isSameAs(*B.getRangeMap()));
684 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
685 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
687 #endif // HAVE_TPETRA_DEBUG
689 using Teuchos::ParameterList;
691 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
693 transposer_type transposer(Aprime);
694 Aprime = transposer.createTranspose();
697 #ifdef HAVE_TPETRA_DEBUG
698 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null(), std::logic_error,
699 prefix_mmm <<
"Failed to compute Op(A). "
700 "Please report this bug to the Tpetra developers.");
701 #endif // HAVE_TPETRA_DEBUG
704 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
707 std::ostringstream os;
708 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
709 <<
"Form explicit xpose of B" << std::endl;
710 std::cerr << os.str();
712 transposer_type transposer(Bprime);
713 Bprime = transposer.createTranspose();
715 #ifdef HAVE_TPETRA_DEBUG
716 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null(), std::logic_error,
717 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
718 TEUCHOS_TEST_FOR_EXCEPTION(
719 !Aprime->isFillComplete() || !Bprime->isFillComplete(), std::invalid_argument,
720 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
721 "Please report this bug to the Tpetra developers.");
722 #endif // HAVE_TPETRA_DEBUG
723 RCP<const map_type> CDomainMap = domainMap;
724 RCP<const map_type> CRangeMap = rangeMap;
725 if (CDomainMap.is_null()) {
726 CDomainMap = Bprime->getDomainMap();
728 if (CRangeMap.is_null()) {
729 CRangeMap = Bprime->getRangeMap();
731 assert(!(CDomainMap.is_null()));
732 assert(!(CRangeMap.is_null()));
733 typedef typename AddKern::values_array values_array;
734 typedef typename AddKern::row_ptrs_array row_ptrs_array;
735 typedef typename AddKern::col_inds_array col_inds_array;
736 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
737 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
739 row_ptrs_array rowptrs;
740 col_inds_array colinds;
745 if (!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap())))) {
747 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
752 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
754 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
755 bool sorted = AGraphSorted && BGraphSorted;
756 RCP<const import_type> Cimport = Teuchos::null;
757 RCP<export_type> Cexport = Teuchos::null;
758 bool doFillComplete =
true;
759 if (Teuchos::nonnull(params) && params->isParameter(
"Call fillComplete")) {
760 doFillComplete = params->get<
bool>(
"Call fillComplete");
762 auto Alocal = Aprime->getLocalMatrixDevice();
763 auto Blocal = Bprime->getLocalMatrixDevice();
764 LO numLocalRows = Alocal.numRows();
765 if (numLocalRows == 0) {
771 rowptrs = row_ptrs_array(
"C rowptrs", 0);
773 auto Acolmap = Aprime->getColMap();
774 auto Bcolmap = Bprime->getColMap();
775 if (!matchingColMaps) {
776 using global_col_inds_array =
typename AddKern::global_col_inds_array;
781 auto AlocalColmap = Acolmap->getLocalMap();
782 auto BlocalColmap = Bcolmap->getLocalMap();
783 global_col_inds_array globalColinds;
785 std::ostringstream os;
786 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
787 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
788 std::cerr << os.str();
790 AddKern::convertToGlobalAndAdd(
791 Alocal, alpha, Blocal, beta, AlocalColmap, BlocalColmap,
792 vals, rowptrs, globalColinds);
794 std::ostringstream os;
795 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
796 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
797 std::cerr << os.str();
802 RCP<const map_type> CcolMap;
803 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>(CcolMap, CDomainMap, globalColinds);
804 C.replaceColMap(CcolMap);
805 col_inds_array localColinds(
"C colinds", globalColinds.extent(0));
806 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, globalColinds.extent(0)),
807 ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal,
808 col_inds_array, global_col_inds_array,
809 typename map_type::local_map_type>(localColinds, globalColinds, CcolMap->getLocalMap()));
810 KokkosSparse::sort_crs_matrix<exec_space, row_ptrs_array, col_inds_array, values_array>(rowptrs, localColinds, vals);
811 C.setAllValues(rowptrs, localColinds, vals);
812 C.fillComplete(CDomainMap, CRangeMap, params);
817 auto Avals = Alocal.values;
818 auto Bvals = Blocal.values;
819 auto Arowptrs = Alocal.graph.row_map;
820 auto Browptrs = Blocal.graph.row_map;
821 auto Acolinds = Alocal.graph.entries;
822 auto Bcolinds = Blocal.graph.entries;
828 std::ostringstream os;
829 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
830 <<
"Call AddKern::addSorted(...)" << std::endl;
831 std::cerr << os.str();
833 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
840 std::ostringstream os;
841 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
842 <<
"Call AddKern::addUnsorted(...)" << std::endl;
843 std::cerr << os.str();
845 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
848 RCP<const map_type> Ccolmap = Bcolmap;
849 C.replaceColMap(Ccolmap);
850 C.setAllValues(rowptrs, colinds, vals);
851 if (doFillComplete) {
854 if (!CDomainMap->isSameAs(*Ccolmap)) {
856 std::ostringstream os;
857 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
858 <<
"Create Cimport" << std::endl;
859 std::cerr << os.str();
861 Cimport = rcp(
new import_type(CDomainMap, Ccolmap));
863 if (!C.getRowMap()->isSameAs(*CRangeMap)) {
865 std::ostringstream os;
866 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
867 <<
"Create Cexport" << std::endl;
868 std::cerr << os.str();
870 Cexport = rcp(
new export_type(C.getRowMap(), CRangeMap));
874 std::ostringstream os;
875 os <<
"Proc " << A.getMap()->getComm()->getRank() <<
": "
876 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
877 std::cerr << os.str();
879 C.expertStaticFillComplete(CDomainMap, CRangeMap, Cimport, Cexport, params);
886 template <
class Scalar,
891 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
894 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
897 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& C) {
899 using Teuchos::Array;
900 using Teuchos::ArrayRCP;
901 using Teuchos::ArrayView;
904 using Teuchos::rcp_dynamic_cast;
905 using Teuchos::rcpFromRef;
906 using Teuchos::tuple;
908 typedef Teuchos::ScalarTraits<Scalar> STS;
909 typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
913 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type;
914 typedef RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer_type;
916 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
918 TEUCHOS_TEST_FOR_EXCEPTION(
919 !A.isFillComplete() || !B.isFillComplete(), std::invalid_argument,
920 prefix <<
"A and B must both be fill complete before calling this function.");
923 TEUCHOS_TEST_FOR_EXCEPTION(!A.haveGlobalConstants(), std::logic_error,
924 prefix <<
"C is null (must be allocated), but A.haveGlobalConstants() is false. "
925 "Please report this bug to the Tpetra developers.");
926 TEUCHOS_TEST_FOR_EXCEPTION(!B.haveGlobalConstants(), std::logic_error,
927 prefix <<
"C is null (must be allocated), but B.haveGlobalConstants() is false. "
928 "Please report this bug to the Tpetra developers.");
931 #ifdef HAVE_TPETRA_DEBUG
933 const bool domainMapsSame =
934 (!transposeA && !transposeB && !A.getDomainMap()->isSameAs(*(B.getDomainMap()))) ||
935 (!transposeA && transposeB && !A.getDomainMap()->isSameAs(*(B.getRangeMap()))) ||
936 (transposeA && !transposeB && !A.getRangeMap()->isSameAs(*(B.getDomainMap())));
937 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
938 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
940 const bool rangeMapsSame =
941 (!transposeA && !transposeB && !A.getRangeMap()->isSameAs(*(B.getRangeMap()))) ||
942 (!transposeA && transposeB && !A.getRangeMap()->isSameAs(*(B.getDomainMap()))) ||
943 (transposeA && !transposeB && !A.getDomainMap()->isSameAs(*(B.getRangeMap())));
944 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
945 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
947 #endif // HAVE_TPETRA_DEBUG
949 using Teuchos::ParameterList;
950 RCP<ParameterList> transposeParams(
new ParameterList);
951 transposeParams->set(
"sort",
false);
954 RCP<const crs_matrix_type> Aprime;
956 transposer_type theTransposer(rcpFromRef(A));
957 Aprime = theTransposer.createTranspose(transposeParams);
959 Aprime = rcpFromRef(A);
962 #ifdef HAVE_TPETRA_DEBUG
963 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null(), std::logic_error,
964 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
965 #endif // HAVE_TPETRA_DEBUG
968 RCP<const crs_matrix_type> Bprime;
970 transposer_type theTransposer(rcpFromRef(B));
971 Bprime = theTransposer.createTranspose(transposeParams);
973 Bprime = rcpFromRef(B);
976 #ifdef HAVE_TPETRA_DEBUG
977 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null(), std::logic_error,
978 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
979 #endif // HAVE_TPETRA_DEBUG
981 bool CwasFillComplete =
false;
985 CwasFillComplete = C->isFillComplete();
986 if (CwasFillComplete)
988 C->setAllToScalar(STS::zero());
998 if (Aprime->getRowMap()->isSameAs(*Bprime->getRowMap())) {
999 LocalOrdinal numLocalRows = Aprime->getLocalNumRows();
1000 Array<size_t> CmaxEntriesPerRow(numLocalRows);
1001 for (LocalOrdinal i = 0; i < numLocalRows; i++) {
1002 CmaxEntriesPerRow[i] = Aprime->getNumEntriesInLocalRow(i) + Bprime->getNumEntriesInLocalRow(i);
1004 C = rcp(
new crs_matrix_type(Aprime->getRowMap(), CmaxEntriesPerRow()));
1007 C = rcp(
new crs_matrix_type(Aprime->getRowMap(), Aprime->getGlobalMaxNumRowEntries() + Bprime->getGlobalMaxNumRowEntries()));
1011 #ifdef HAVE_TPETRA_DEBUG
1012 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null(), std::logic_error,
1013 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1014 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null(), std::logic_error,
1015 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1016 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null(), std::logic_error,
1017 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1018 #endif // HAVE_TPETRA_DEBUG
1020 Array<RCP<const crs_matrix_type>> Mat =
1021 tuple<RCP<const crs_matrix_type>>(Aprime, Bprime);
1022 Array<Scalar> scalar = tuple<Scalar>(scalarA, scalarB);
1025 for (
int k = 0; k < 2; ++k) {
1026 typename crs_matrix_type::nonconst_global_inds_host_view_type Indices;
1027 typename crs_matrix_type::nonconst_values_host_view_type Values;
1035 #ifdef HAVE_TPETRA_DEBUG
1036 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null(), std::logic_error,
1037 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1038 #endif // HAVE_TPETRA_DEBUG
1039 RCP<const map_type> curRowMap = Mat[k]->getRowMap();
1040 #ifdef HAVE_TPETRA_DEBUG
1041 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null(), std::logic_error,
1042 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1043 #endif // HAVE_TPETRA_DEBUG
1045 const size_t localNumRows = Mat[k]->getLocalNumRows();
1046 for (
size_t i = 0; i < localNumRows; ++i) {
1047 const GlobalOrdinal globalRow = curRowMap->getGlobalElement(i);
1048 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow(globalRow);
1049 if (numEntries > 0) {
1050 if (numEntries > Indices.extent(0)) {
1051 Kokkos::resize(Indices, numEntries);
1052 Kokkos::resize(Values, numEntries);
1054 Mat[k]->getGlobalRowCopy(globalRow, Indices, Values, numEntries);
1056 if (scalar[k] != STS::one()) {
1057 for (
size_t j = 0; j < numEntries; ++j) {
1058 Values[j] *= scalar[k];
1062 if (CwasFillComplete) {
1063 size_t result = C->sumIntoGlobalValues(globalRow, numEntries,
1064 reinterpret_cast<Scalar*>(Values.data()), Indices.data());
1065 TEUCHOS_TEST_FOR_EXCEPTION(result != numEntries, std::logic_error,
1066 prefix <<
"sumIntoGlobalValues failed to add entries from A or B into C.");
1068 C->insertGlobalValues(globalRow, numEntries,
1069 reinterpret_cast<Scalar*>(Values.data()), Indices.data());
1074 if (CwasFillComplete) {
1075 C->fillComplete(C->getDomainMap(),
1082 template <
class Scalar,
1084 class GlobalOrdinal,
1087 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1090 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1093 const Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& C) {
1094 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
1096 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null(), std::invalid_argument,
1097 prefix <<
"C must not be null");
1099 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> C_ = C;
1100 Add(A, transposeA, scalarA, B, transposeB, scalarB, C_);
1105 namespace MMdetails {
1157 template <
class Scalar,
1159 class GlobalOrdinal,
1161 void mult_AT_B_newmatrix(
1162 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1163 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1164 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1165 const std::string& label,
1166 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1170 typedef LocalOrdinal LO;
1171 typedef GlobalOrdinal GO;
1173 typedef CrsMatrixStruct<SC, LO, GO, NO> crs_matrix_struct_type;
1174 typedef RowMatrixTransposer<SC, LO, GO, NO> transposer_type;
1181 transposer_type transposer(rcpFromRef(A), label + std::string(
"XP: "));
1183 using Teuchos::ParameterList;
1184 RCP<ParameterList> transposeParams(
new ParameterList);
1185 transposeParams->set(
"sort",
true);
1186 if (!params.is_null()) {
1187 transposeParams->set(
"compute global constants",
1188 params->get(
"compute global constants: temporaries",
1191 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1192 transposer.createTransposeLocal(transposeParams);
1201 crs_matrix_struct_type Aview;
1202 crs_matrix_struct_type Bview;
1203 RCP<const Import<LO, GO, NO>> dummyImporter;
1206 RCP<Teuchos::ParameterList> importParams1(
new ParameterList);
1207 if (!params.is_null()) {
1208 importParams1->set(
"compute global constants",
1209 params->get(
"compute global constants: temporaries",
1211 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1212 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1213 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1214 int mm_optimization_core_count =
1216 mm_optimization_core_count =
1217 slist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1218 int mm_optimization_core_count2 =
1219 params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1220 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1221 mm_optimization_core_count = mm_optimization_core_count2;
1223 auto& sip1 = importParams1->sublist(
"matrixmatrix: kernel params",
false);
1224 sip1.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1225 sip1.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
1226 sip1.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1229 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(),
1230 Aview, dummyImporter,
true,
1231 label, importParams1);
1233 RCP<ParameterList> importParams2(
new ParameterList);
1234 if (!params.is_null()) {
1235 importParams2->set(
"compute global constants",
1236 params->get(
"compute global constants: temporaries",
1238 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1239 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1240 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1241 int mm_optimization_core_count =
1243 mm_optimization_core_count =
1244 slist.get(
"MM_TAFC_OptimizationCoreCount",
1245 mm_optimization_core_count);
1246 int mm_optimization_core_count2 =
1247 params->get(
"MM_TAFC_OptimizationCoreCount",
1248 mm_optimization_core_count);
1249 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1250 mm_optimization_core_count = mm_optimization_core_count2;
1252 auto& sip2 = importParams2->sublist(
"matrixmatrix: kernel params",
false);
1253 sip2.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1254 sip2.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
1255 sip2.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1258 if (B.getRowMap()->isSameAs(*Atrans->getColMap())) {
1259 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label, importParams2);
1261 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label, importParams2);
1267 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1270 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1271 if (needs_final_export) {
1274 Ctemp = rcp(&C,
false);
1277 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label, params);
1285 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp(&C,
false);
1287 if (needs_final_export) {
1288 ParameterList labelList;
1289 labelList.set(
"Timer Label", label);
1290 if (!params.is_null()) {
1291 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1292 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1294 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1295 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1296 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
1297 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1298 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1299 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1301 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM,
1302 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1303 labelList.set(
"compute global constants", params->get(
"compute global constants",
true));
1304 labelList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1306 Ctemp->exportAndFillComplete(Crcp,
1307 *Ctemp->getGraph()->getExporter(),
1310 rcp(&labelList,
false));
1312 #ifdef HAVE_TPETRA_MMM_STATISTICS
1313 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label + std::string(
" AT_B MMM"));
1319 template <
class Scalar,
1321 class GlobalOrdinal,
1324 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1325 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1326 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1327 const std::string& ,
1328 const Teuchos::RCP<Teuchos::ParameterList>& ) {
1329 using Teuchos::Array;
1330 using Teuchos::ArrayRCP;
1331 using Teuchos::ArrayView;
1332 using Teuchos::null;
1333 using Teuchos::OrdinalTraits;
1335 typedef Teuchos::ScalarTraits<Scalar> STS;
1337 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1338 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1340 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1341 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1343 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getLocalElementList();
1344 ArrayView<const GlobalOrdinal> bcols_import = null;
1345 if (Bview.importColMap != null) {
1346 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1347 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1349 bcols_import = Bview.importColMap->getLocalElementList();
1352 size_t C_numCols = C_lastCol - C_firstCol +
1353 OrdinalTraits<LocalOrdinal>::one();
1354 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1355 OrdinalTraits<LocalOrdinal>::one();
1357 if (C_numCols_import > C_numCols)
1358 C_numCols = C_numCols_import;
1360 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1361 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1362 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1364 Array<Scalar> C_row_i = dwork;
1365 Array<GlobalOrdinal> C_cols = iwork;
1366 Array<size_t> c_index = iwork2;
1367 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2 * C_numCols);
1368 Array<Scalar> combined_values = Array<Scalar>(2 * C_numCols);
1370 size_t C_row_i_length, j, k, last_index;
1373 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1374 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(), LO_INVALID);
1375 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(), LO_INVALID);
1376 if (Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())) {
1378 for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
1379 Aview.colMap->getMaxLocalIndex();
1384 for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
1385 Aview.colMap->getMaxLocalIndex();
1387 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1388 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1389 if (BLID != LO_INVALID)
1390 Acol2Brow[i] = BLID;
1392 Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1402 auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
1403 auto Acolind = Aview.origMatrix->getLocalIndicesHost();
1404 auto Avals = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1405 auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
1406 auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
1407 auto Bvals = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1408 decltype(Browptr) Irowptr;
1409 decltype(Bcolind) Icolind;
1410 decltype(Bvals) Ivals;
1411 if (!Bview.importMatrix.is_null()) {
1412 Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
1413 Icolind = Bview.importMatrix->getLocalIndicesHost();
1414 Ivals = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1417 bool C_filled = C.isFillComplete();
1419 for (
size_t i = 0; i < C_numCols; i++)
1420 c_index[i] = OrdinalTraits<size_t>::invalid();
1423 size_t Arows = Aview.rowMap->getLocalNumElements();
1424 for (
size_t i = 0; i < Arows; ++i) {
1427 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1433 C_row_i_length = OrdinalTraits<size_t>::zero();
1435 for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
1436 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1437 const Scalar Aval = Avals[k];
1438 if (Aval == STS::zero())
1441 if (Ak == LO_INVALID)
1444 for (j = Browptr[Ak]; j < Browptr[Ak + 1]; ++j) {
1445 LocalOrdinal col = Bcolind[j];
1448 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1451 C_row_i[C_row_i_length] = Aval * Bvals[j];
1452 C_cols[C_row_i_length] = col;
1453 c_index[col] = C_row_i_length;
1458 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Bvals[j]);
1463 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1464 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1465 C_cols[ii] = bcols[C_cols[ii]];
1466 combined_index[ii] = C_cols[ii];
1467 combined_values[ii] = C_row_i[ii];
1469 last_index = C_row_i_length;
1475 C_row_i_length = OrdinalTraits<size_t>::zero();
1477 for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
1478 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1479 const Scalar Aval = Avals[k];
1480 if (Aval == STS::zero())
1483 if (Ak != LO_INVALID)
continue;
1485 Ak = Acol2Irow[Acolind[k]];
1486 for (j = Irowptr[Ak]; j < Irowptr[Ak + 1]; ++j) {
1487 LocalOrdinal col = Icolind[j];
1490 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1493 C_row_i[C_row_i_length] = Aval * Ivals[j];
1494 C_cols[C_row_i_length] = col;
1495 c_index[col] = C_row_i_length;
1501 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Ivals[j]);
1506 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1507 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1508 C_cols[ii] = bcols_import[C_cols[ii]];
1509 combined_index[last_index] = C_cols[ii];
1510 combined_values[last_index] = C_row_i[ii];
1516 C_filled ? C.sumIntoGlobalValues(
1518 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1519 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1520 : C.insertGlobalValues(
1522 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1523 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1528 template <
class Scalar,
1530 class GlobalOrdinal,
1532 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1533 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal>>::size_type local_length_size;
1534 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1536 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1537 Mview.maxNumRowEntries = Mview.indices[0].size();
1539 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1540 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1541 Mview.maxNumRowEntries = Mview.indices[i].size();
1546 template <
class CrsMatrixType>
1547 size_t C_estimate_nnz(CrsMatrixType& A, CrsMatrixType& B) {
1549 size_t Aest = 100, Best = 100;
1550 if (A.getLocalNumEntries() >= A.getLocalNumRows())
1551 Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries() / A.getLocalNumRows() : 100;
1552 if (B.getLocalNumEntries() >= B.getLocalNumRows())
1553 Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries() / B.getLocalNumRows() : 100;
1555 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1556 nnzperrow *= nnzperrow;
1558 return (
size_t)(A.getLocalNumRows() * nnzperrow * 0.75 + 100);
1567 template <
class Scalar,
1569 class GlobalOrdinal,
1571 void mult_A_B_newmatrix(
1572 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1573 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1574 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1575 const std::string& label,
1576 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1577 using Teuchos::Array;
1578 using Teuchos::ArrayRCP;
1579 using Teuchos::ArrayView;
1584 typedef LocalOrdinal LO;
1585 typedef GlobalOrdinal GO;
1587 typedef Import<LO, GO, NO> import_type;
1588 typedef Map<LO, GO, NO> map_type;
1591 typedef typename map_type::local_map_type local_map_type;
1593 typedef typename KCRS::StaticCrsGraphType graph_t;
1594 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1595 typedef typename NO::execution_space execution_space;
1596 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1597 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1601 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1604 RCP<const import_type> Cimport;
1605 RCP<const map_type> Ccolmap;
1606 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1607 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1608 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1609 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1610 local_map_type Irowmap_local;
1611 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1612 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1613 local_map_type Icolmap_local;
1614 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1621 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
1623 if (Bview.importMatrix.is_null()) {
1626 Ccolmap = Bview.colMap;
1627 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1629 Kokkos::parallel_for(
1630 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1631 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1632 KOKKOS_LAMBDA(
const LO i) {
1644 if (!Bimport.is_null() && !Iimport.is_null()) {
1645 Cimport = Bimport->setUnion(*Iimport);
1646 }
else if (!Bimport.is_null() && Iimport.is_null()) {
1647 Cimport = Bimport->setUnion();
1648 }
else if (Bimport.is_null() && !Iimport.is_null()) {
1649 Cimport = Iimport->setUnion();
1651 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1653 Ccolmap = Cimport->getTargetMap();
1658 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1659 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1666 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
1667 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1668 Kokkos::parallel_for(
1669 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement", range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
1670 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1672 Kokkos::parallel_for(
1673 "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement", range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
1674 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1682 C.replaceColMap(Ccolmap);
1700 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
1701 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
1703 Kokkos::parallel_for(
1704 "Tpetra::mult_A_B_newmatrix::construct_tables", range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
1705 GO aidx = Acolmap_local.getGlobalElement(i);
1706 LO B_LID = Browmap_local.getLocalElement(aidx);
1707 if (B_LID != LO_INVALID) {
1708 targetMapToOrigRow(i) = B_LID;
1709 targetMapToImportRow(i) = LO_INVALID;
1711 LO I_LID = Irowmap_local.getLocalElement(aidx);
1712 targetMapToOrigRow(i) = LO_INVALID;
1713 targetMapToImportRow(i) = I_LID;
1719 KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
1724 template <
class Scalar,
1726 class GlobalOrdinal,
1728 void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1729 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1730 Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& C) {
1731 using Teuchos::Array;
1732 using Teuchos::ArrayRCP;
1733 using Teuchos::ArrayView;
1734 using Teuchos::null;
1739 typedef LocalOrdinal LO;
1740 typedef GlobalOrdinal GO;
1742 typedef Import<LO, GO, NO> import_type;
1743 typedef Map<LO, GO, NO> map_type;
1744 typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
1745 typedef typename block_crs_matrix_type::crs_graph_type graph_t;
1748 typedef typename map_type::local_map_type local_map_type;
1749 typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
1750 typedef typename KBSR::device_type device_t;
1751 typedef typename KBSR::StaticCrsGraphType static_graph_t;
1752 typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
1753 typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
1754 typedef typename KBSR::values_type::non_const_type scalar_view_t;
1755 typedef typename NO::execution_space execution_space;
1756 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1757 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1759 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1762 RCP<const import_type> Cimport;
1763 RCP<const map_type> Ccolmap;
1764 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1765 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1766 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1767 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1768 local_map_type Irowmap_local;
1769 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1770 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1771 local_map_type Icolmap_local;
1772 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1779 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
1781 if (Bview.importMatrix.is_null()) {
1784 Ccolmap = Bview.colMap;
1785 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1787 Kokkos::parallel_for(
1788 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1789 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1790 KOKKOS_LAMBDA(
const LO i) {
1802 if (!Bimport.is_null() && !Iimport.is_null()) {
1803 Cimport = Bimport->setUnion(*Iimport);
1804 }
else if (!Bimport.is_null() && Iimport.is_null()) {
1805 Cimport = Bimport->setUnion();
1806 }
else if (Bimport.is_null() && !Iimport.is_null()) {
1807 Cimport = Iimport->setUnion();
1809 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1811 Ccolmap = Cimport->getTargetMap();
1818 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
1819 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1820 Kokkos::parallel_for(
1821 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
1822 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()),
1823 KOKKOS_LAMBDA(
const LO i) {
1824 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1826 Kokkos::parallel_for(
1827 "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
1828 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()),
1829 KOKKOS_LAMBDA(
const LO i) {
1830 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1850 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
1851 Aview.colMap->getLocalNumElements());
1852 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
1853 Aview.colMap->getLocalNumElements());
1855 Kokkos::parallel_for(
1856 "Tpetra::mult_A_B_newmatrix::construct_tables",
1857 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1),
1858 KOKKOS_LAMBDA(
const LO i) {
1859 GO aidx = Acolmap_local.getGlobalElement(i);
1860 LO B_LID = Browmap_local.getLocalElement(aidx);
1861 if (B_LID != LO_INVALID) {
1862 targetMapToOrigRow(i) = B_LID;
1863 targetMapToImportRow(i) = LO_INVALID;
1865 LO I_LID = Irowmap_local.getLocalElement(aidx);
1866 targetMapToOrigRow(i) = LO_INVALID;
1867 targetMapToImportRow(i) = I_LID;
1872 using KernelHandle =
1873 KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
1874 typename lno_nnz_view_t::const_value_type,
1875 typename scalar_view_t::const_value_type,
1876 typename device_t::execution_space,
1877 typename device_t::memory_space,
1878 typename device_t::memory_space>;
1879 int team_work_size = 16;
1880 std::string myalg(
"SPGEMM_KK_MEMORY");
1881 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
1884 kh.create_spgemm_handle(alg_enum);
1885 kh.set_team_work_size(team_work_size);
1888 const KBSR Amat = Aview.origMatrix->getLocalMatrixDevice();
1889 const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview,
1890 targetMapToOrigRow, targetMapToImportRow,
1891 Bcol2Ccol, Icol2Ccol,
1892 Ccolmap.getConst()->getLocalNumElements());
1894 RCP<graph_t> graphC;
1895 typename KBSR::values_type values;
1900 KokkosSparse::block_spgemm_symbolic(kh, Amat,
false, Bmerged,
false, Cmat);
1901 KokkosSparse::block_spgemm_numeric(kh, Amat,
false, Bmerged,
false, Cmat);
1902 kh.destroy_spgemm_handle();
1905 graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
1906 values = Cmat.values;
1908 C = rcp(
new block_crs_matrix_type(*graphC, values, Aview.blocksize));
1913 template <
class Scalar,
1915 class GlobalOrdinal,
1917 class LocalOrdinalViewType>
1918 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1919 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1920 const LocalOrdinalViewType& targetMapToOrigRow,
1921 const LocalOrdinalViewType& targetMapToImportRow,
1922 const LocalOrdinalViewType& Bcol2Ccol,
1923 const LocalOrdinalViewType& Icol2Ccol,
1924 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1925 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
1926 const std::string& label,
1927 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1928 using Teuchos::Array;
1929 using Teuchos::ArrayRCP;
1930 using Teuchos::ArrayView;
1937 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
1938 typedef typename KCRS::StaticCrsGraphType graph_t;
1939 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1940 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1941 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1942 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1945 typedef LocalOrdinal LO;
1946 typedef GlobalOrdinal GO;
1948 typedef Map<LO, GO, NO> map_type;
1949 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1950 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1951 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1954 RCP<const map_type> Ccolmap = C.getColMap();
1955 size_t m = Aview.origMatrix->getLocalNumRows();
1956 size_t n = Ccolmap->getLocalNumElements();
1957 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
1960 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
1961 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
1963 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
1964 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
1965 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
1967 c_lno_view_t Irowptr;
1968 lno_nnz_view_t Icolind;
1969 scalar_view_t Ivals;
1970 if (!Bview.importMatrix.is_null()) {
1971 auto lclB = Bview.importMatrix->getLocalMatrixHost();
1972 Irowptr = lclB.graph.row_map;
1973 Icolind = lclB.graph.entries;
1974 Ivals = lclB.values;
1975 b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
1985 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
1986 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
1987 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
1988 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
1998 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1999 std::vector<size_t> c_status(n, ST_INVALID);
2009 size_t CSR_ip = 0, OLD_ip = 0;
2010 for (
size_t i = 0; i < m; i++) {
2013 Crowptr[i] = CSR_ip;
2016 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2017 LO Aik = Acolind[k];
2018 const SC Aval = Avals[k];
2019 if (Aval == SC_ZERO)
2022 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2029 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2032 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2033 LO Bkj = Bcolind[j];
2034 LO Cij = Bcol2Ccol[Bkj];
2036 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2038 c_status[Cij] = CSR_ip;
2039 Ccolind[CSR_ip] = Cij;
2040 Cvals[CSR_ip] = Aval * Bvals[j];
2044 Cvals[c_status[Cij]] += Aval * Bvals[j];
2055 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2056 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2057 LO Ikj = Icolind[j];
2058 LO Cij = Icol2Ccol[Ikj];
2060 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2062 c_status[Cij] = CSR_ip;
2063 Ccolind[CSR_ip] = Cij;
2064 Cvals[CSR_ip] = Aval * Ivals[j];
2067 Cvals[c_status[Cij]] += Aval * Ivals[j];
2074 if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1]) * b_max_nnz_per_row) > CSR_alloc) {
2076 Kokkos::resize(Ccolind, CSR_alloc);
2077 Kokkos::resize(Cvals, CSR_alloc);
2082 Crowptr[m] = CSR_ip;
2085 Kokkos::resize(Ccolind, CSR_ip);
2086 Kokkos::resize(Cvals, CSR_ip);
2092 if (params.is_null() || params->get(
"sort entries",
true))
2093 Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
2094 C.setAllValues(Crowptr, Ccolind, Cvals);
2107 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2108 labelList->set(
"Timer Label", label);
2109 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
2110 RCP<const Export<LO, GO, NO>> dummyExport;
2111 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
2116 template <
class Scalar,
2118 class GlobalOrdinal,
2120 void mult_A_B_reuse(
2121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2122 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2123 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2124 const std::string& label,
2125 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2126 using Teuchos::Array;
2127 using Teuchos::ArrayRCP;
2128 using Teuchos::ArrayView;
2133 typedef LocalOrdinal LO;
2134 typedef GlobalOrdinal GO;
2136 typedef Import<LO, GO, NO> import_type;
2137 typedef Map<LO, GO, NO> map_type;
2140 typedef typename map_type::local_map_type local_map_type;
2142 typedef typename KCRS::StaticCrsGraphType graph_t;
2143 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2144 typedef typename NO::execution_space execution_space;
2145 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2146 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2151 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2154 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2155 RCP<const map_type> Ccolmap = C.getColMap();
2156 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2157 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2158 local_map_type Irowmap_local;
2159 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2160 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2161 local_map_type Icolmap_local;
2162 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2163 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2166 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
2170 Kokkos::parallel_for(
2171 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2172 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2175 if (!Bview.importMatrix.is_null()) {
2176 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2177 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2179 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2180 Kokkos::parallel_for(
2181 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2182 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2188 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2189 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2190 Kokkos::parallel_for(
2191 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2192 GO aidx = Acolmap_local.getGlobalElement(i);
2193 LO B_LID = Browmap_local.getLocalElement(aidx);
2194 if (B_LID != LO_INVALID) {
2195 targetMapToOrigRow(i) = B_LID;
2196 targetMapToImportRow(i) = LO_INVALID;
2198 LO I_LID = Irowmap_local.getLocalElement(aidx);
2199 targetMapToOrigRow(i) = LO_INVALID;
2200 targetMapToImportRow(i) = I_LID;
2206 KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2210 template <
class Scalar,
2212 class GlobalOrdinal,
2214 class LocalOrdinalViewType>
2215 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2216 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2217 const LocalOrdinalViewType& targetMapToOrigRow,
2218 const LocalOrdinalViewType& targetMapToImportRow,
2219 const LocalOrdinalViewType& Bcol2Ccol,
2220 const LocalOrdinalViewType& Icol2Ccol,
2221 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2222 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
2223 const std::string& label,
2224 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2229 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2230 typedef typename KCRS::StaticCrsGraphType graph_t;
2231 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2232 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2233 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2236 typedef LocalOrdinal LO;
2237 typedef GlobalOrdinal GO;
2239 typedef Map<LO, GO, NO> map_type;
2240 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2241 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2242 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2248 RCP<const map_type> Ccolmap = C.getColMap();
2249 size_t m = Aview.origMatrix->getLocalNumRows();
2250 size_t n = Ccolmap->getLocalNumElements();
2253 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
2254 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
2255 const KCRS& Cmat = C.getLocalMatrixHost();
2257 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2258 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2259 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2260 scalar_view_t Cvals = Cmat.values;
2262 c_lno_view_t Irowptr;
2263 lno_nnz_view_t Icolind;
2264 scalar_view_t Ivals;
2265 if (!Bview.importMatrix.is_null()) {
2266 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2267 Irowptr = lclB.graph.row_map;
2268 Icolind = lclB.graph.entries;
2269 Ivals = lclB.values;
2281 std::vector<size_t> c_status(n, ST_INVALID);
2284 size_t CSR_ip = 0, OLD_ip = 0;
2285 for (
size_t i = 0; i < m; i++) {
2288 OLD_ip = Crowptr[i];
2289 CSR_ip = Crowptr[i + 1];
2290 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2291 c_status[Ccolind[k]] = k;
2297 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2298 LO Aik = Acolind[k];
2299 const SC Aval = Avals[k];
2300 if (Aval == SC_ZERO)
2303 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2305 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2307 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2308 LO Bkj = Bcolind[j];
2309 LO Cij = Bcol2Ccol[Bkj];
2311 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2312 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
2313 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2315 Cvals[c_status[Cij]] += Aval * Bvals[j];
2320 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2321 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2322 LO Ikj = Icolind[j];
2323 LO Cij = Icol2Ccol[Ikj];
2325 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2326 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
2327 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2329 Cvals[c_status[Cij]] += Aval * Ivals[j];
2337 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2343 template <
class Scalar,
2345 class GlobalOrdinal,
2347 void jacobi_A_B_newmatrix(
2349 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2350 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2351 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2352 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2353 const std::string& label,
2354 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2355 using Teuchos::Array;
2356 using Teuchos::ArrayRCP;
2357 using Teuchos::ArrayView;
2361 typedef LocalOrdinal LO;
2362 typedef GlobalOrdinal GO;
2365 typedef Import<LO, GO, NO> import_type;
2366 typedef Map<LO, GO, NO> map_type;
2367 typedef typename map_type::local_map_type local_map_type;
2371 typedef typename KCRS::StaticCrsGraphType graph_t;
2372 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2373 typedef typename NO::execution_space execution_space;
2374 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2375 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2379 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2382 RCP<const import_type> Cimport;
2383 RCP<const map_type> Ccolmap;
2384 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2385 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2386 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2387 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2388 local_map_type Irowmap_local;
2389 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2390 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2391 local_map_type Icolmap_local;
2392 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2399 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
2401 if (Bview.importMatrix.is_null()) {
2404 Ccolmap = Bview.colMap;
2408 Kokkos::RangePolicy<execution_space, LO> range(0, static_cast<LO>(Bview.colMap->getLocalNumElements()));
2409 Kokkos::parallel_for(
2410 range, KOKKOS_LAMBDA(
const size_t i) {
2411 Bcol2Ccol(i) =
static_cast<LO
>(i);
2422 if (!Bimport.is_null() && !Iimport.is_null()) {
2423 Cimport = Bimport->setUnion(*Iimport);
2424 Ccolmap = Cimport->getTargetMap();
2426 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2427 Cimport = Bimport->setUnion();
2429 }
else if (Bimport.is_null() && !Iimport.is_null()) {
2430 Cimport = Iimport->setUnion();
2433 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2435 Ccolmap = Cimport->getTargetMap();
2437 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2438 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2445 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2446 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2447 Kokkos::parallel_for(
2448 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2449 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2451 Kokkos::parallel_for(
2452 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2453 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2461 C.replaceColMap(Ccolmap);
2479 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2480 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2481 Kokkos::parallel_for(
2482 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2483 GO aidx = Acolmap_local.getGlobalElement(i);
2484 LO B_LID = Browmap_local.getLocalElement(aidx);
2485 if (B_LID != LO_INVALID) {
2486 targetMapToOrigRow(i) = B_LID;
2487 targetMapToImportRow(i) = LO_INVALID;
2489 LO I_LID = Irowmap_local.getLocalElement(aidx);
2490 targetMapToOrigRow(i) = LO_INVALID;
2491 targetMapToImportRow(i) = I_LID;
2497 KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2504 template <
class Scalar,
2506 class GlobalOrdinal,
2508 class LocalOrdinalViewType>
2509 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2510 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2511 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2512 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2513 const LocalOrdinalViewType& targetMapToOrigRow,
2514 const LocalOrdinalViewType& targetMapToImportRow,
2515 const LocalOrdinalViewType& Bcol2Ccol,
2516 const LocalOrdinalViewType& Icol2Ccol,
2517 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2518 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
2519 const std::string& label,
2520 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2523 using Teuchos::Array;
2524 using Teuchos::ArrayRCP;
2525 using Teuchos::ArrayView;
2530 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2531 typedef typename KCRS::StaticCrsGraphType graph_t;
2532 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2533 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2534 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2535 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2538 typedef typename scalar_view_t::memory_space scalar_memory_space;
2541 typedef LocalOrdinal LO;
2542 typedef GlobalOrdinal GO;
2545 typedef Map<LO, GO, NO> map_type;
2546 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2547 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2550 RCP<const map_type> Ccolmap = C.getColMap();
2551 size_t m = Aview.origMatrix->getLocalNumRows();
2552 size_t n = Ccolmap->getLocalNumElements();
2553 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2556 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
2557 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
2559 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2560 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2561 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2563 c_lno_view_t Irowptr;
2564 lno_nnz_view_t Icolind;
2565 scalar_view_t Ivals;
2566 if (!Bview.importMatrix.is_null()) {
2567 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2568 Irowptr = lclB.graph.row_map;
2569 Icolind = lclB.graph.entries;
2570 Ivals = lclB.values;
2571 b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
2576 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2584 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2585 Array<size_t> c_status(n, ST_INVALID);
2594 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2595 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
2596 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
2597 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
2598 size_t CSR_ip = 0, OLD_ip = 0;
2600 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2614 for (
size_t i = 0; i < m; i++) {
2617 Crowptr[i] = CSR_ip;
2618 SC minusOmegaDval = -omega * Dvals(i, 0);
2621 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
2622 Scalar Bval = Bvals[j];
2623 if (Bval == SC_ZERO)
2625 LO Bij = Bcolind[j];
2626 LO Cij = Bcol2Ccol[Bij];
2629 c_status[Cij] = CSR_ip;
2630 Ccolind[CSR_ip] = Cij;
2631 Cvals[CSR_ip] = Bvals[j];
2636 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2637 LO Aik = Acolind[k];
2638 const SC Aval = Avals[k];
2639 if (Aval == SC_ZERO)
2642 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2644 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2646 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2647 LO Bkj = Bcolind[j];
2648 LO Cij = Bcol2Ccol[Bkj];
2650 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2652 c_status[Cij] = CSR_ip;
2653 Ccolind[CSR_ip] = Cij;
2654 Cvals[CSR_ip] = minusOmegaDval * Aval * Bvals[j];
2658 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
2664 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2665 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2666 LO Ikj = Icolind[j];
2667 LO Cij = Icol2Ccol[Ikj];
2669 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2671 c_status[Cij] = CSR_ip;
2672 Ccolind[CSR_ip] = Cij;
2673 Cvals[CSR_ip] = minusOmegaDval * Aval * Ivals[j];
2676 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
2683 if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1] + 1) * b_max_nnz_per_row) > CSR_alloc) {
2685 Kokkos::resize(Ccolind, CSR_alloc);
2686 Kokkos::resize(Cvals, CSR_alloc);
2690 Crowptr[m] = CSR_ip;
2693 Kokkos::resize(Ccolind, CSR_ip);
2694 Kokkos::resize(Cvals, CSR_ip);
2703 C.replaceColMap(Ccolmap);
2710 if (params.is_null() || params->get(
"sort entries",
true))
2711 Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
2712 C.setAllValues(Crowptr, Ccolind, Cvals);
2725 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2726 labelList->set(
"Timer Label", label);
2727 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
2728 RCP<const Export<LO, GO, NO>> dummyExport;
2729 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
2735 template <
class Scalar,
2737 class GlobalOrdinal,
2739 void jacobi_A_B_reuse(
2741 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2742 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2743 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2744 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2745 const std::string& label,
2746 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2747 using Teuchos::Array;
2748 using Teuchos::ArrayRCP;
2749 using Teuchos::ArrayView;
2753 typedef LocalOrdinal LO;
2754 typedef GlobalOrdinal GO;
2757 typedef Import<LO, GO, NO> import_type;
2758 typedef Map<LO, GO, NO> map_type;
2761 typedef typename map_type::local_map_type local_map_type;
2763 typedef typename KCRS::StaticCrsGraphType graph_t;
2764 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2765 typedef typename NO::execution_space execution_space;
2766 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2767 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2769 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2770 lo_view_t Bcol2Ccol, Icol2Ccol;
2771 lo_view_t targetMapToOrigRow;
2772 lo_view_t targetMapToImportRow;
2776 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2779 RCP<const map_type> Ccolmap = C.getColMap();
2780 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2781 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2782 local_map_type Irowmap_local;
2783 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2784 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2785 local_map_type Icolmap_local;
2786 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2787 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2790 Bcol2Ccol = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements());
2794 Kokkos::parallel_for(
2795 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2796 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2799 if (!Bview.importMatrix.is_null()) {
2800 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2801 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2803 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2804 Kokkos::parallel_for(
2805 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2806 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2812 targetMapToOrigRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2813 targetMapToImportRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2814 Kokkos::parallel_for(
2815 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2816 GO aidx = Acolmap_local.getGlobalElement(i);
2817 LO B_LID = Browmap_local.getLocalElement(aidx);
2818 if (B_LID != LO_INVALID) {
2819 targetMapToOrigRow(i) = B_LID;
2820 targetMapToImportRow(i) = LO_INVALID;
2822 LO I_LID = Irowmap_local.getLocalElement(aidx);
2823 targetMapToOrigRow(i) = LO_INVALID;
2824 targetMapToImportRow(i) = I_LID;
2831 KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2835 template <
class Scalar,
2837 class GlobalOrdinal,
2839 class LocalOrdinalViewType>
2840 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2841 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2842 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2843 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2844 const LocalOrdinalViewType& targetMapToOrigRow,
2845 const LocalOrdinalViewType& targetMapToImportRow,
2846 const LocalOrdinalViewType& Bcol2Ccol,
2847 const LocalOrdinalViewType& Icol2Ccol,
2848 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2849 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
2850 const std::string& label,
2851 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2859 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2860 typedef typename KCRS::StaticCrsGraphType graph_t;
2861 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2862 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2863 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2864 typedef typename scalar_view_t::memory_space scalar_memory_space;
2867 typedef LocalOrdinal LO;
2868 typedef GlobalOrdinal GO;
2870 typedef Map<LO, GO, NO> map_type;
2871 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2872 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2873 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2876 RCP<const map_type> Ccolmap = C.getColMap();
2877 size_t m = Aview.origMatrix->getLocalNumRows();
2878 size_t n = Ccolmap->getLocalNumElements();
2881 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
2882 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
2883 const KCRS& Cmat = C.getLocalMatrixHost();
2885 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2886 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2887 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2888 scalar_view_t Cvals = Cmat.values;
2890 c_lno_view_t Irowptr;
2891 lno_nnz_view_t Icolind;
2892 scalar_view_t Ivals;
2893 if (!Bview.importMatrix.is_null()) {
2894 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2895 Irowptr = lclB.graph.row_map;
2896 Icolind = lclB.graph.entries;
2897 Ivals = lclB.values;
2902 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2909 std::vector<size_t> c_status(n, ST_INVALID);
2912 size_t CSR_ip = 0, OLD_ip = 0;
2913 for (
size_t i = 0; i < m; i++) {
2916 OLD_ip = Crowptr[i];
2917 CSR_ip = Crowptr[i + 1];
2918 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2919 c_status[Ccolind[k]] = k;
2925 SC minusOmegaDval = -omega * Dvals(i, 0);
2928 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
2929 Scalar Bval = Bvals[j];
2930 if (Bval == SC_ZERO)
2932 LO Bij = Bcolind[j];
2933 LO Cij = Bcol2Ccol[Bij];
2935 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2936 std::runtime_error,
"Trying to insert a new entry into a static graph");
2938 Cvals[c_status[Cij]] = Bvals[j];
2942 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2943 LO Aik = Acolind[k];
2944 const SC Aval = Avals[k];
2945 if (Aval == SC_ZERO)
2948 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2950 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2952 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2953 LO Bkj = Bcolind[j];
2954 LO Cij = Bcol2Ccol[Bkj];
2956 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2957 std::runtime_error,
"Trying to insert a new entry into a static graph");
2959 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
2964 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2965 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2966 LO Ikj = Icolind[j];
2967 LO Cij = Icol2Ccol[Ikj];
2969 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2970 std::runtime_error,
"Trying to insert a new entry into a static graph");
2972 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
2980 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2985 template <
class Scalar,
2987 class GlobalOrdinal,
2989 void import_and_extract_views(
2990 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
2991 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
2992 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2993 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
2994 bool userAssertsThereAreNoRemotes,
2995 const std::string& label,
2996 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2997 using Teuchos::Array;
2998 using Teuchos::ArrayView;
2999 using Teuchos::null;
3004 typedef LocalOrdinal LO;
3005 typedef GlobalOrdinal GO;
3008 typedef Map<LO, GO, NO> map_type;
3009 typedef Import<LO, GO, NO> import_type;
3010 typedef CrsMatrix<SC, LO, GO, NO> crs_matrix_type;
3020 Aview.deleteContents();
3022 Aview.origMatrix = rcp(&A,
false);
3024 Aview.origMatrix->getApplyHelper();
3025 Aview.origRowMap = A.getRowMap();
3026 Aview.rowMap = targetMap;
3027 Aview.colMap = A.getColMap();
3028 Aview.domainMap = A.getDomainMap();
3029 Aview.importColMap = null;
3030 RCP<const map_type> rowMap = A.getRowMap();
3031 const int numProcs = rowMap->getComm()->getSize();
3034 if (userAssertsThereAreNoRemotes || numProcs < 2)
3037 RCP<const import_type> importer;
3038 if (params != null && params->isParameter(
"importer")) {
3039 importer = params->get<RCP<const import_type>>(
"importer");
3047 RCP<const map_type> remoteRowMap;
3048 size_t numRemote = 0;
3050 if (!prototypeImporter.is_null() &&
3051 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3052 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3056 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3057 numRemote = prototypeImporter->getNumRemoteIDs();
3059 Array<GO> remoteRows(numRemote);
3060 for (
size_t i = 0; i < numRemote; i++)
3061 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3063 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3064 rowMap->getIndexBase(), rowMap->getComm()));
3067 }
else if (prototypeImporter.is_null()) {
3071 ArrayView<const GO> rows = targetMap->getLocalElementList();
3072 size_t numRows = targetMap->getLocalNumElements();
3074 Array<GO> remoteRows(numRows);
3075 for (
size_t i = 0; i < numRows; ++i) {
3076 const LO mlid = rowMap->getLocalElement(rows[i]);
3078 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3079 remoteRows[numRemote++] = rows[i];
3081 remoteRows.resize(numRemote);
3082 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3083 rowMap->getIndexBase(), rowMap->getComm()));
3092 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3093 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3106 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote));
3108 if (globalMaxNumRemote > 0) {
3114 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3116 importer = rcp(
new import_type(rowMap, remoteRowMap));
3118 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3122 params->set(
"importer", importer);
3125 if (importer != null) {
3130 Teuchos::ParameterList labelList;
3131 labelList.set(
"Timer Label", label);
3132 auto& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3135 bool overrideAllreduce =
false;
3138 Teuchos::ParameterList params_sublist;
3139 if (!params.is_null()) {
3140 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3141 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3142 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3143 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3144 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
3145 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3146 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3148 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
3149 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3150 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
3152 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3153 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3155 Aview.importMatrix->getApplyHelper();
3161 sprintf(str,
"import_matrix.%d.dat",count);
3166 #ifdef HAVE_TPETRA_MMM_STATISTICS
3167 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3174 Aview.importColMap = Aview.importMatrix->getColMap();
3180 template <
class Scalar,
3182 class GlobalOrdinal,
3184 void import_and_extract_views(
3185 const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
3186 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
3187 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
3188 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
3189 bool userAssertsThereAreNoRemotes) {
3190 using Teuchos::Array;
3191 using Teuchos::ArrayView;
3192 using Teuchos::null;
3197 typedef LocalOrdinal LO;
3198 typedef GlobalOrdinal GO;
3201 typedef Map<LO, GO, NO> map_type;
3202 typedef Import<LO, GO, NO> import_type;
3203 typedef BlockCrsMatrix<SC, LO, GO, NO> blockcrs_matrix_type;
3211 Mview.deleteContents();
3213 Mview.origMatrix = rcp(&M,
false);
3215 Mview.origMatrix->getApplyHelper();
3216 Mview.origRowMap = M.getRowMap();
3217 Mview.rowMap = targetMap;
3218 Mview.colMap = M.getColMap();
3219 Mview.importColMap = null;
3220 RCP<const map_type> rowMap = M.getRowMap();
3221 const int numProcs = rowMap->getComm()->getSize();
3224 if (userAssertsThereAreNoRemotes || numProcs < 2)
return;
3228 RCP<const map_type> remoteRowMap;
3229 size_t numRemote = 0;
3231 if (!prototypeImporter.is_null() &&
3232 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3233 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3235 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3236 numRemote = prototypeImporter->getNumRemoteIDs();
3238 Array<GO> remoteRows(numRemote);
3239 for (
size_t i = 0; i < numRemote; i++)
3240 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3242 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3243 rowMap->getIndexBase(), rowMap->getComm()));
3246 }
else if (prototypeImporter.is_null()) {
3248 ArrayView<const GO> rows = targetMap->getLocalElementList();
3249 size_t numRows = targetMap->getLocalNumElements();
3251 Array<GO> remoteRows(numRows);
3252 for (
size_t i = 0; i < numRows; ++i) {
3253 const LO mlid = rowMap->getLocalElement(rows[i]);
3255 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3256 remoteRows[numRemote++] = rows[i];
3258 remoteRows.resize(numRemote);
3259 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3260 rowMap->getIndexBase(), rowMap->getComm()));
3269 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3270 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3278 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote));
3280 RCP<const import_type> importer;
3282 if (globalMaxNumRemote > 0) {
3285 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3287 importer = rcp(
new import_type(rowMap, remoteRowMap));
3289 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
3292 if (importer != null) {
3295 Mview.importMatrix = Tpetra::importAndFillCompleteBlockCrsMatrix<blockcrs_matrix_type>(rcpFromRef(M), *importer);
3297 Mview.importMatrix->getApplyHelper();
3300 Mview.importColMap = Mview.importMatrix->getColMap();
3306 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3308 merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3309 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3310 const LocalOrdinalViewType& Acol2Brow,
3311 const LocalOrdinalViewType& Acol2Irow,
3312 const LocalOrdinalViewType& Bcol2Ccol,
3313 const LocalOrdinalViewType& Icol2Ccol,
3314 const size_t mergedNodeNumCols) {
3317 typedef typename KCRS::StaticCrsGraphType graph_t;
3318 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3319 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3320 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3322 const KCRS& Ak = Aview.origMatrix->getLocalMatrixDevice();
3323 const KCRS& Bk = Bview.origMatrix->getLocalMatrixDevice();
3326 if (!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3330 RCP<const KCRS> Ik_;
3331 if (!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrixDevice());
3332 const KCRS* Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3334 if (Ik != 0) Iks = *Ik;
3335 size_t merge_numrows = Ak.numCols();
3337 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3339 const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3342 typedef typename Node::execution_space execution_space;
3343 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3344 Kokkos::parallel_scan(
3345 "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
3346 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3347 if (
final) Mrowptr(i) = update;
3350 if (Acol2Brow(i) != LO_INVALID)
3351 ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
3353 ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
3356 if (
final && i + 1 == merge_numrows)
3357 Mrowptr(i + 1) = update;
3361 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
3362 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
3363 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz);
3366 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3367 Kokkos::parallel_for(
3368 "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
3369 if (Acol2Brow(i) != LO_INVALID) {
3370 size_t row = Acol2Brow(i);
3371 size_t start = Bk.graph.row_map(row);
3372 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3373 Mvalues(j) = Bk.values(j - Mrowptr(i) + start);
3374 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
3377 size_t row = Acol2Irow(i);
3378 size_t start = Iks.graph.row_map(row);
3379 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3380 Mvalues(j) = Iks.values(j - Mrowptr(i) + start);
3381 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
3386 KCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind);
3396 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3397 const typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
3398 merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3399 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3400 const LocalOrdinalViewType& Acol2Brow,
3401 const LocalOrdinalViewType& Acol2Irow,
3402 const LocalOrdinalViewType& Bcol2Ccol,
3403 const LocalOrdinalViewType& Icol2Ccol,
3404 const size_t mergedNodeNumCols) {
3406 typedef typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type KBCRS;
3407 typedef typename KBCRS::StaticCrsGraphType graph_t;
3408 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3409 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3410 typedef typename KBCRS::values_type::non_const_type scalar_view_t;
3413 const KBCRS& Ak = Aview.origMatrix->getLocalMatrixDevice();
3414 const KBCRS& Bk = Bview.origMatrix->getLocalMatrixDevice();
3417 if (!Bview.importMatrix.is_null() ||
3418 (Bview.importMatrix.is_null() &&
3419 (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3423 RCP<const KBCRS> Ik_;
3424 if (!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KBCRS>(Bview.importMatrix->getLocalMatrixDevice());
3425 const KBCRS* Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3427 if (Ik != 0) Iks = *Ik;
3428 size_t merge_numrows = Ak.numCols();
3431 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3433 const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3436 typedef typename Node::execution_space execution_space;
3437 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3438 Kokkos::parallel_scan(
3439 "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
3440 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3441 if (
final) Mrowptr(i) = update;
3444 if (Acol2Brow(i) != LO_INVALID)
3445 ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
3447 ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
3450 if (
final && i + 1 == merge_numrows)
3451 Mrowptr(i + 1) = update;
3455 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
3456 const int blocksize = Ak.blockDim();
3457 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
3458 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz * blocksize * blocksize);
3461 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3462 Kokkos::parallel_for(
3463 "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
3464 if (Acol2Brow(i) != LO_INVALID) {
3465 size_t row = Acol2Brow(i);
3466 size_t start = Bk.graph.row_map(row);
3467 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3468 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
3470 for (
int b = 0; b < blocksize * blocksize; ++b) {
3471 const int val_indx = j * blocksize * blocksize + b;
3472 const int b_val_indx = (j - Mrowptr(i) +
start) * blocksize * blocksize + b;
3473 Mvalues(val_indx) = Bk.values(b_val_indx);
3477 size_t row = Acol2Irow(i);
3478 size_t start = Iks.graph.row_map(row);
3479 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3480 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
3482 for (
int b = 0; b < blocksize * blocksize; ++b) {
3483 const int val_indx = j * blocksize * blocksize + b;
3484 const int b_val_indx = (j - Mrowptr(i) +
start) * blocksize * blocksize + b;
3485 Mvalues(val_indx) = Iks.values(b_val_indx);
3492 KBCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind, blocksize);
3501 template <
typename SC,
typename LO,
typename GO,
typename NO>
3502 void AddKernels<SC, LO, GO, NO>::
3504 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3505 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3506 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3507 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3508 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3509 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3510 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3511 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3513 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3514 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3515 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
3517 using Teuchos::TimeMonitor;
3518 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3519 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3520 auto nrows = Arowptrs.extent(0) - 1;
3521 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3522 typename AddKern::KKH handle;
3523 handle.create_spadd_handle(
true);
3524 auto addHandle = handle.get_spadd_handle();
3528 KokkosSparse::spadd_symbolic(&handle,
3529 nrows, numGlobalCols,
3530 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3532 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3533 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3537 KokkosSparse::spadd_numeric(&handle,
3538 nrows, numGlobalCols,
3539 Arowptrs, Acolinds, Avals, scalarA,
3540 Browptrs, Bcolinds, Bvals, scalarB,
3541 Crowptrs, Ccolinds, Cvals);
3544 template <
typename SC,
typename LO,
typename GO,
typename NO>
3545 void AddKernels<SC, LO, GO, NO>::
3547 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3548 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3549 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3550 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3551 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3552 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3553 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3554 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3556 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3557 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3558 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
3560 using Teuchos::TimeMonitor;
3561 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3562 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3563 auto nrows = Arowptrs.extent(0) - 1;
3564 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3565 typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
3566 typename AddKern::KKH handle;
3567 handle.create_spadd_handle(
false);
3568 auto addHandle = handle.get_spadd_handle();
3571 KokkosSparse::spadd_symbolic(&handle,
3572 nrows, numGlobalCols,
3573 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3575 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3576 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3579 KokkosSparse::spadd_numeric(&handle,
3580 nrows, numGlobalCols,
3581 Arowptrs, Acolinds, Avals, scalarA,
3582 Browptrs, Bcolinds, Bvals, scalarB,
3583 Crowptrs, Ccolinds, Cvals);
3586 template <
typename GO,
3587 typename LocalIndicesType,
3588 typename GlobalIndicesType,
3589 typename ColMapType>
3590 struct ConvertLocalToGlobalFunctor {
3591 ConvertLocalToGlobalFunctor(
3592 const LocalIndicesType& colindsOrig_,
3593 const GlobalIndicesType& colindsConverted_,
3594 const ColMapType& colmap_)
3595 : colindsOrig(colindsOrig_)
3596 , colindsConverted(colindsConverted_)
3597 , colmap(colmap_) {}
3598 KOKKOS_INLINE_FUNCTION
void
3599 operator()(
const GO i)
const {
3600 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
3602 LocalIndicesType colindsOrig;
3603 GlobalIndicesType colindsConverted;
3607 template <
typename SC,
typename LO,
typename GO,
typename NO>
3608 void MMdetails::AddKernels<SC, LO, GO, NO>::
3609 convertToGlobalAndAdd(
3610 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
3611 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3612 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
3613 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3614 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
3615 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
3616 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3617 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3618 typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds) {
3620 using Teuchos::TimeMonitor;
3623 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
3624 typename NO::execution_space,
typename NO::memory_space,
typename NO::memory_space>;
3626 const values_array Avals = A.values;
3627 const values_array Bvals = B.values;
3628 const col_inds_array Acolinds = A.graph.entries;
3629 const col_inds_array Bcolinds = B.graph.entries;
3630 auto Arowptrs = A.graph.row_map;
3631 auto Browptrs = B.graph.row_map;
3632 global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
3633 global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
3637 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
3638 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
3639 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
3640 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
3642 handle.create_spadd_handle(
false);
3643 auto addHandle = handle.get_spadd_handle();
3646 auto nrows = Arowptrs.extent(0) - 1;
3647 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3648 KokkosSparse::spadd_symbolic(&handle,
3650 Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
3651 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3652 Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3656 KokkosSparse::spadd_numeric(&handle,
3658 Arowptrs, AcolindsConverted, Avals, scalarA,
3659 Browptrs, BcolindsConverted, Bvals, scalarB,
3660 Crowptrs, Ccolinds, Cvals);
3675 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR, LO, GO, NODE) \
3676 template void MatrixMatrix::Multiply( \
3677 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3679 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3681 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3682 bool call_FillComplete_on_result, \
3683 const std::string& label, \
3684 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3686 template void MatrixMatrix::Multiply( \
3687 const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& A, \
3689 const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& B, \
3691 Teuchos::RCP<BlockCrsMatrix<SCALAR, LO, GO, NODE>>& C, \
3692 const std::string& label); \
3694 template void MatrixMatrix::Jacobi( \
3696 const Vector<SCALAR, LO, GO, NODE>& Dinv, \
3697 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3698 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3699 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3700 bool call_FillComplete_on_result, \
3701 const std::string& label, \
3702 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3704 template void MatrixMatrix::Add( \
3705 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3708 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3711 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C); \
3713 template void MatrixMatrix::Add( \
3714 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3717 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3720 const Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C); \
3722 template void MatrixMatrix::Add( \
3723 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3726 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3729 template Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
3730 MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha, \
3731 const bool transposeA, \
3732 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3733 const SCALAR& beta, \
3734 const bool transposeB, \
3735 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3736 const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap, \
3737 const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap, \
3738 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3741 MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha, \
3742 const bool transposeA, \
3743 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3744 const SCALAR& beta, \
3745 const bool transposeB, \
3746 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3747 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3748 const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap, \
3749 const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap, \
3750 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3752 template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>; \
3754 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M, \
3755 Teuchos::RCP<const Map<LO, GO, NODE>> targetMap, \
3756 CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3757 Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \
3758 bool userAssertsThereAreNoRemotes, \
3759 const std::string& label, \
3760 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3762 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M, \
3763 Teuchos::RCP<const Map<LO, GO, NODE>> targetMap, \
3764 BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3765 Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \
3766 bool userAssertsThereAreNoRemotes);
3769 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
Struct that holds views of the contents of a BlockCrsMatrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Utility functions for packing and unpacking sparse matrix entries.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void start()
Start the deep_copy counter.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A distributed dense vector.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Matrix Market file readers and writers for Tpetra objects.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.
Forward declaration of some Tpetra Matrix Matrix objects.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.