10 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_DEF_HPP
13 #include "KokkosSparse_Utils.hpp"
14 #include "Tpetra_ConfigDefs.hpp"
16 #include "Teuchos_VerboseObject.hpp"
17 #include "Teuchos_Array.hpp"
19 #include "Tpetra_CrsMatrix.hpp"
20 #include "Tpetra_BlockCrsMatrix.hpp"
22 #include "Tpetra_RowMatrixTransposer.hpp"
25 #include "Tpetra_Details_makeColMap.hpp"
26 #include "Tpetra_ConfigDefs.hpp"
27 #include "Tpetra_Map.hpp"
28 #include "Tpetra_Export.hpp"
34 #include <type_traits>
35 #include "Teuchos_FancyOStream.hpp"
37 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
40 #include "KokkosSparse_spgemm.hpp"
41 #include "KokkosSparse_spadd.hpp"
42 #include "Kokkos_Bitset.hpp"
56 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
57 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
58 #include "TpetraExt_MatrixMatrix_HIP.hpp"
59 #include "TpetraExt_MatrixMatrix_SYCL.hpp"
63 namespace MatrixMatrix{
71 template <
class Scalar,
81 bool call_FillComplete_on_result,
82 const std::string& label,
83 const Teuchos::RCP<Teuchos::ParameterList>& params)
89 typedef LocalOrdinal LO;
90 typedef GlobalOrdinal GO;
100 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
105 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
106 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
111 RCP<const crs_matrix_type> Aprime = null;
115 RCP<const crs_matrix_type> Bprime = null;
125 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
127 bool use_optimized_ATB =
false;
128 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
129 use_optimized_ATB =
true;
131 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
132 use_optimized_ATB =
false;
135 using Teuchos::ParameterList;
136 RCP<ParameterList> transposeParams (
new ParameterList);
137 transposeParams->set (
"sort",
true);
139 if (!use_optimized_ATB && transposeA) {
140 transposer_type transposer (rcpFromRef (A));
141 Aprime = transposer.createTranspose (transposeParams);
144 Aprime = rcpFromRef(A);
148 transposer_type transposer (rcpFromRef (B));
149 Bprime = transposer.createTranspose (transposeParams);
152 Bprime = rcpFromRef(B);
162 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
163 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
164 "must match for matrix-matrix product. op(A) is "
165 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
171 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
172 prefix <<
"ERROR, dimensions of result C must "
174 <<
" rows, should have at least " << Aouter << std::endl);
186 int numProcs = A.
getComm()->getSize();
190 crs_matrix_struct_type Aview;
191 crs_matrix_struct_type Bview;
193 RCP<const map_type> targetMap_A = Aprime->getRowMap();
194 RCP<const map_type> targetMap_B = Bprime->getRowMap();
202 if (!use_optimized_ATB) {
203 RCP<const import_type> dummyImporter;
204 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
210 targetMap_B = Aprime->getColMap();
213 if (!use_optimized_ATB)
214 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(), Aprime->getGraph()->getImporter().is_null(), label, params);
222 if (use_optimized_ATB) {
223 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
225 }
else if (call_FillComplete_on_result && newFlag) {
226 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
228 }
else if (call_FillComplete_on_result) {
229 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
235 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
237 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
248 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
256 template <
class Scalar,
266 const std::string& label)
272 typedef LocalOrdinal LO;
273 typedef GlobalOrdinal GO;
279 std::string prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
281 TEUCHOS_TEST_FOR_EXCEPTION(transposeA==
true, std::runtime_error, prefix <<
"Matrix A cannot be transposed.");
282 TEUCHOS_TEST_FOR_EXCEPTION(transposeB==
true, std::runtime_error, prefix <<
"Matrix B cannot be transposed.");
294 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
295 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
296 "must match for matrix-matrix product. op(A) is "
297 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
301 int numProcs = A->getComm()->getSize();
303 const LO blocksize = A->getBlockSize();
304 TEUCHOS_TEST_FOR_EXCEPTION(blocksize != B->getBlockSize(), std::runtime_error,
305 prefix <<
"ERROR, Blocksizes do not match. A.blocksize = " <<
306 blocksize <<
", B.blocksize = " << B->getBlockSize() );
310 blockcrs_matrix_struct_type Aview(blocksize);
311 blockcrs_matrix_struct_type Bview(blocksize);
313 RCP<const map_type> targetMap_A = A->getRowMap();
314 RCP<const map_type> targetMap_B = B->getRowMap();
317 RCP<const import_type> dummyImporter;
318 MMdetails::import_and_extract_views(*A, targetMap_A, Aview, dummyImporter,
true);
323 targetMap_B = A->getColMap();
326 MMdetails::import_and_extract_views(*B, targetMap_B, Bview, A->getGraph()->getImporter(),
327 A->getGraph()->getImporter().is_null());
330 MMdetails::mult_A_B_newmatrix(Aview, Bview, C);
333 template <
class Scalar,
342 bool call_FillComplete_on_result,
343 const std::string& label,
344 const Teuchos::RCP<Teuchos::ParameterList>& params)
349 typedef LocalOrdinal LO;
350 typedef GlobalOrdinal GO;
360 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
365 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
366 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
368 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
369 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
378 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
379 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
380 "must match for matrix-matrix product. op(A) is "
381 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
387 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
388 prefix <<
"ERROR, dimensions of result C must "
390 <<
" rows, should have at least "<< Aouter << std::endl);
402 int numProcs = A.
getComm()->getSize();
406 crs_matrix_struct_type Aview;
407 crs_matrix_struct_type Bview;
409 RCP<const map_type> targetMap_A = Aprime->getRowMap();
410 RCP<const map_type> targetMap_B = Bprime->getRowMap();
416 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
417 if(!params.is_null()) {
418 importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
419 int mm_optimization_core_count=0;
420 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
422 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
423 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
424 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
425 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
426 auto & ip1slist = importParams1->sublist(
"matrixmatrix: kernel params",
false);
427 ip1slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
428 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
429 ip1slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
433 RCP<const import_type> dummyImporter;
434 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label,importParams1);
439 targetMap_B = Aprime->getColMap();
444 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
445 if(!params.is_null()) {
446 importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
448 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
450 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
451 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
452 auto & ip2slist = importParams2->sublist(
"matrixmatrix: kernel params",
false);
453 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
454 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
455 ip2slist.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
456 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
457 ip2slist.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
460 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(), Aprime->getGraph()->getImporter().is_null(), label,importParams2);
466 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
469 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
471 if (call_FillComplete_on_result && newFlag) {
472 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
474 }
else if (call_FillComplete_on_result) {
475 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
478 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
481 if(!params.is_null()) {
482 bool removeZeroEntries = params->get(
"remove zeros",
false);
483 if (removeZeroEntries) {
484 typedef Teuchos::ScalarTraits<Scalar> STS;
485 typename STS::magnitudeType threshold = params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
492 template <
class Scalar,
503 using Teuchos::Array;
507 typedef LocalOrdinal LO;
508 typedef GlobalOrdinal GO;
513 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
515 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
516 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
517 "(Result matrix B is not required to be isFillComplete()).");
518 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
519 prefix <<
"ERROR, input matrix B must not be fill complete!");
520 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
521 prefix <<
"ERROR, input matrix B must not have static graph!");
523 prefix <<
"ERROR, input matrix B must not be locally indexed!");
525 using Teuchos::ParameterList;
526 RCP<ParameterList> transposeParams (
new ParameterList);
527 transposeParams->set (
"sort",
false);
529 RCP<const crs_matrix_type> Aprime = null;
531 transposer_type transposer (rcpFromRef (A));
532 Aprime = transposer.createTranspose (transposeParams);
535 Aprime = rcpFromRef(A);
543 if (scalarB != Teuchos::ScalarTraits<SC>::one())
547 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
548 for (LO i = 0; (size_t)i < numMyRows; ++i) {
549 row = B.
getRowMap()->getGlobalElement(i);
550 Aprime->getGlobalRowCopy(row, a_inds, a_vals, a_numEntries);
552 if (scalarA != Teuchos::ScalarTraits<SC>::one()) {
553 for (
size_t j = 0; j < a_numEntries; ++j)
554 a_vals[j] *= scalarA;
556 B.
insertGlobalValues(row, a_numEntries, reinterpret_cast<Scalar *>(a_vals.data()), a_inds.data());
561 template <
class Scalar,
565 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
567 const bool transposeA,
570 const bool transposeB,
574 const Teuchos::RCP<Teuchos::ParameterList>& params)
577 using Teuchos::rcpFromRef;
579 using Teuchos::ParameterList;
581 if(!params.is_null())
583 TEUCHOS_TEST_FOR_EXCEPTION(
584 params->isParameter(
"Call fillComplete") && !params->get<
bool>(
"Call fillComplete"),
585 std::invalid_argument,
586 "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n"
587 "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
588 params->set(
"Call fillComplete",
true);
592 RCP<const crs_matrix_type> Brcp = rcpFromRef(B);
599 TEUCHOS_TEST_FOR_EXCEPTION
600 (! A.
isFillComplete () || ! Brcp->isFillComplete (), std::invalid_argument,
601 "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
602 RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(Brcp->getRowMap(), 0));
604 add(alpha, transposeA, A, beta,
false, *Brcp, *C, domainMap, rangeMap, params);
612 template<
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
613 struct ConvertGlobalToLocalFunctor
615 ConvertGlobalToLocalFunctor(LOView& lids_,
const GOView& gids_,
const LocalMap localColMap_)
616 : lids(lids_), gids(gids_), localColMap(localColMap_)
619 KOKKOS_FUNCTION
void operator() (
const GO i)
const
621 lids(i) = localColMap.getLocalElement(gids(i));
626 const LocalMap localColMap;
629 template <
class Scalar,
635 const bool transposeA,
638 const bool transposeB,
643 const Teuchos::RCP<Teuchos::ParameterList>& params)
647 using Teuchos::rcpFromRef;
648 using Teuchos::rcp_implicit_cast;
649 using Teuchos::rcp_dynamic_cast;
650 using Teuchos::TimeMonitor;
652 using LO = LocalOrdinal;
653 using GO = GlobalOrdinal;
661 using exec_space =
typename crs_graph_type::execution_space;
662 using AddKern = MMdetails::AddKernels<SC,LO,GO,NO>;
663 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
664 constexpr
bool debug =
false;
669 std::ostringstream os;
670 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
671 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
672 std::cerr << os.str ();
675 TEUCHOS_TEST_FOR_EXCEPTION
677 prefix_mmm <<
"C must be a 'new' matrix (neither locally nor globally indexed).");
678 TEUCHOS_TEST_FOR_EXCEPTION
680 prefix_mmm <<
"A and B must both be fill complete.");
681 #ifdef HAVE_TPETRA_DEBUG
684 const bool domainMapsSame =
685 (! transposeA && ! transposeB &&
687 (! transposeA && transposeB &&
689 ( transposeA && ! transposeB &&
691 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
692 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
694 const bool rangeMapsSame =
695 (! transposeA && ! transposeB &&
697 (! transposeA && transposeB &&
699 ( transposeA && ! transposeB &&
701 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
702 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
704 #endif // HAVE_TPETRA_DEBUG
706 using Teuchos::ParameterList;
708 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
710 transposer_type transposer (Aprime);
711 Aprime = transposer.createTranspose ();
714 #ifdef HAVE_TPETRA_DEBUG
715 TEUCHOS_TEST_FOR_EXCEPTION
716 (Aprime.is_null (), std::logic_error,
717 prefix_mmm <<
"Failed to compute Op(A). "
718 "Please report this bug to the Tpetra developers.");
719 #endif // HAVE_TPETRA_DEBUG
722 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
725 std::ostringstream os;
726 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
727 <<
"Form explicit xpose of B" << std::endl;
728 std::cerr << os.str ();
730 transposer_type transposer (Bprime);
731 Bprime = transposer.createTranspose ();
733 #ifdef HAVE_TPETRA_DEBUG
734 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
735 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
736 TEUCHOS_TEST_FOR_EXCEPTION(
737 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
738 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
739 "Please report this bug to the Tpetra developers.");
740 #endif // HAVE_TPETRA_DEBUG
741 RCP<const map_type> CDomainMap = domainMap;
742 RCP<const map_type> CRangeMap = rangeMap;
743 if(CDomainMap.is_null())
745 CDomainMap = Bprime->getDomainMap();
747 if(CRangeMap.is_null())
749 CRangeMap = Bprime->getRangeMap();
751 assert(!(CDomainMap.is_null()));
752 assert(!(CRangeMap.is_null()));
753 typedef typename AddKern::values_array values_array;
754 typedef typename AddKern::row_ptrs_array row_ptrs_array;
755 typedef typename AddKern::col_inds_array col_inds_array;
756 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
757 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
759 row_ptrs_array rowptrs;
760 col_inds_array colinds;
764 if(!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap()))))
767 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
772 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
774 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
775 bool sorted = AGraphSorted && BGraphSorted;
776 RCP<const import_type> Cimport = Teuchos::null;
777 RCP<export_type> Cexport = Teuchos::null;
778 bool doFillComplete =
true;
779 if(Teuchos::nonnull(params) && params->isParameter(
"Call fillComplete"))
781 doFillComplete = params->get<
bool>(
"Call fillComplete");
783 auto Alocal = Aprime->getLocalMatrixDevice();
784 auto Blocal = Bprime->getLocalMatrixDevice();
785 LO numLocalRows = Alocal.numRows();
786 if(numLocalRows == 0)
793 rowptrs = row_ptrs_array(
"C rowptrs", 0);
795 auto Acolmap = Aprime->getColMap();
796 auto Bcolmap = Bprime->getColMap();
799 using global_col_inds_array =
typename AddKern::global_col_inds_array;
803 auto AlocalColmap = Acolmap->getLocalMap();
804 auto BlocalColmap = Bcolmap->getLocalMap();
805 global_col_inds_array globalColinds;
807 std::ostringstream os;
808 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
809 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
810 std::cerr << os.str ();
812 AddKern::convertToGlobalAndAdd(
813 Alocal, alpha, Blocal, beta, AlocalColmap, BlocalColmap,
814 vals, rowptrs, globalColinds);
816 std::ostringstream os;
817 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
818 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
819 std::cerr << os.str ();
823 RCP<const map_type> CcolMap;
824 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
825 (CcolMap, CDomainMap, globalColinds);
827 col_inds_array localColinds(
"C colinds", globalColinds.extent(0));
828 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, globalColinds.extent(0)),
829 ConvertGlobalToLocalFunctor<LocalOrdinal, GlobalOrdinal,
830 col_inds_array, global_col_inds_array,
831 typename map_type::local_map_type>
832 (localColinds, globalColinds, CcolMap->getLocalMap()));
833 KokkosSparse::sort_crs_matrix<exec_space, row_ptrs_array, col_inds_array, values_array>(rowptrs, localColinds, vals);
842 auto Avals = Alocal.values;
843 auto Bvals = Blocal.values;
844 auto Arowptrs = Alocal.graph.row_map;
845 auto Browptrs = Blocal.graph.row_map;
846 auto Acolinds = Alocal.graph.entries;
847 auto Bcolinds = Blocal.graph.entries;
853 std::ostringstream os;
854 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
855 <<
"Call AddKern::addSorted(...)" << std::endl;
856 std::cerr << os.str ();
858 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
866 std::ostringstream os;
867 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
868 <<
"Call AddKern::addUnsorted(...)" << std::endl;
869 std::cerr << os.str ();
871 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
874 RCP<const map_type> Ccolmap = Bcolmap;
880 if(!CDomainMap->isSameAs(*Ccolmap))
883 std::ostringstream os;
884 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
885 <<
"Create Cimport" << std::endl;
886 std::cerr << os.str ();
888 Cimport = rcp(
new import_type(CDomainMap, Ccolmap));
893 std::ostringstream os;
894 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
895 <<
"Create Cexport" << std::endl;
896 std::cerr << os.str ();
898 Cexport = rcp(
new export_type(C.
getRowMap(), CRangeMap));
902 std::ostringstream os;
903 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
904 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
905 std::cerr << os.str ();
914 template <
class Scalar,
927 using Teuchos::Array;
928 using Teuchos::ArrayRCP;
929 using Teuchos::ArrayView;
932 using Teuchos::rcp_dynamic_cast;
933 using Teuchos::rcpFromRef;
934 using Teuchos::tuple;
937 typedef Teuchos::ScalarTraits<Scalar> STS;
945 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
947 TEUCHOS_TEST_FOR_EXCEPTION(
949 prefix <<
"A and B must both be fill complete before calling this function.");
953 prefix <<
"C is null (must be allocated), but A.haveGlobalConstants() is false. "
954 "Please report this bug to the Tpetra developers.");
956 prefix <<
"C is null (must be allocated), but B.haveGlobalConstants() is false. "
957 "Please report this bug to the Tpetra developers.");
960 #ifdef HAVE_TPETRA_DEBUG
962 const bool domainMapsSame =
966 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
967 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
969 const bool rangeMapsSame =
973 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
974 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
976 #endif // HAVE_TPETRA_DEBUG
978 using Teuchos::ParameterList;
979 RCP<ParameterList> transposeParams (
new ParameterList);
980 transposeParams->set (
"sort",
false);
983 RCP<const crs_matrix_type> Aprime;
985 transposer_type theTransposer (rcpFromRef (A));
986 Aprime = theTransposer.createTranspose (transposeParams);
989 Aprime = rcpFromRef (A);
992 #ifdef HAVE_TPETRA_DEBUG
993 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
994 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
995 #endif // HAVE_TPETRA_DEBUG
998 RCP<const crs_matrix_type> Bprime;
1000 transposer_type theTransposer (rcpFromRef (B));
1001 Bprime = theTransposer.createTranspose (transposeParams);
1004 Bprime = rcpFromRef (B);
1007 #ifdef HAVE_TPETRA_DEBUG
1008 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1009 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
1010 #endif // HAVE_TPETRA_DEBUG
1012 bool CwasFillComplete =
false;
1015 if (! C.is_null ()) {
1016 CwasFillComplete = C->isFillComplete();
1017 if(CwasFillComplete)
1019 C->setAllToScalar (STS::zero ());
1029 if(Aprime->getRowMap()->isSameAs(*Bprime->getRowMap())) {
1030 LocalOrdinal numLocalRows = Aprime->getLocalNumRows();
1031 Array<size_t> CmaxEntriesPerRow(numLocalRows);
1032 for(LocalOrdinal i = 0; i < numLocalRows; i++) {
1033 CmaxEntriesPerRow[i] = Aprime->getNumEntriesInLocalRow(i) + Bprime->getNumEntriesInLocalRow(i);
1035 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), CmaxEntriesPerRow()));
1039 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), Aprime->getGlobalMaxNumRowEntries() + Bprime->getGlobalMaxNumRowEntries()));
1043 #ifdef HAVE_TPETRA_DEBUG
1044 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1045 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1046 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1047 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1048 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
1049 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1050 #endif // HAVE_TPETRA_DEBUG
1052 Array<RCP<const crs_matrix_type> > Mat =
1053 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
1054 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
1057 for (
int k = 0; k < 2; ++k) {
1058 typename crs_matrix_type::nonconst_global_inds_host_view_type Indices;
1059 typename crs_matrix_type::nonconst_values_host_view_type Values;
1067 #ifdef HAVE_TPETRA_DEBUG
1068 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
1069 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1070 #endif // HAVE_TPETRA_DEBUG
1071 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
1072 #ifdef HAVE_TPETRA_DEBUG
1073 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
1074 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1075 #endif // HAVE_TPETRA_DEBUG
1077 const size_t localNumRows = Mat[k]->getLocalNumRows ();
1078 for (
size_t i = 0; i < localNumRows; ++i) {
1079 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
1080 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
1081 if (numEntries > 0) {
1082 if(numEntries > Indices.extent(0)) {
1083 Kokkos::resize(Indices, numEntries);
1084 Kokkos::resize(Values, numEntries);
1086 Mat[k]->getGlobalRowCopy (globalRow, Indices, Values, numEntries);
1088 if (scalar[k] != STS::one ()) {
1089 for (
size_t j = 0; j < numEntries; ++j) {
1090 Values[j] *= scalar[k];
1094 if (CwasFillComplete) {
1095 size_t result = C->sumIntoGlobalValues (globalRow, numEntries,
1096 reinterpret_cast<Scalar *>(Values.data()), Indices.data());
1097 TEUCHOS_TEST_FOR_EXCEPTION(result != numEntries, std::logic_error,
1098 prefix <<
"sumIntoGlobalValues failed to add entries from A or B into C.");
1100 C->insertGlobalValues (globalRow, numEntries,
1101 reinterpret_cast<Scalar *>(Values.data()), Indices.data());
1106 if(CwasFillComplete) {
1107 C->fillComplete(C->getDomainMap (),
1114 template <
class Scalar,
1116 class GlobalOrdinal,
1127 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
1129 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::invalid_argument,
1130 prefix <<
"C must not be null");
1132 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > C_ = C;
1133 Add(A, transposeA, scalarA, B, transposeB, scalarB, C_);
1138 namespace MMdetails{
1190 template<
class Scalar,
1192 class GlobalOrdinal,
1194 void mult_AT_B_newmatrix(
1195 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1196 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1197 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1198 const std::string & label,
1199 const Teuchos::RCP<Teuchos::ParameterList>& params)
1204 typedef LocalOrdinal LO;
1205 typedef GlobalOrdinal GO;
1207 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1208 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1216 transposer_type transposer (rcpFromRef (A), label + std::string(
"XP: "));
1218 using Teuchos::ParameterList;
1219 RCP<ParameterList> transposeParams (
new ParameterList);
1220 transposeParams->set (
"sort",
true);
1221 if(! params.is_null ()) {
1222 transposeParams->set (
"compute global constants",
1223 params->get (
"compute global constants: temporaries",
1226 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1227 transposer.createTransposeLocal (transposeParams);
1235 crs_matrix_struct_type Aview;
1236 crs_matrix_struct_type Bview;
1237 RCP<const Import<LO, GO, NO> > dummyImporter;
1240 RCP<Teuchos::ParameterList> importParams1 (
new ParameterList);
1241 if (! params.is_null ()) {
1242 importParams1->set (
"compute global constants",
1243 params->get (
"compute global constants: temporaries",
1245 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1246 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1247 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1248 int mm_optimization_core_count =
1250 mm_optimization_core_count =
1251 slist.get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1252 int mm_optimization_core_count2 =
1253 params->get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1254 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1255 mm_optimization_core_count = mm_optimization_core_count2;
1257 auto & sip1 = importParams1->sublist (
"matrixmatrix: kernel params",
false);
1258 sip1.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1259 sip1.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1260 sip1.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1263 MMdetails::import_and_extract_views (*Atrans, Atrans->getRowMap (),
1264 Aview, dummyImporter,
true,
1265 label, importParams1);
1267 RCP<ParameterList> importParams2 (
new ParameterList);
1268 if (! params.is_null ()) {
1269 importParams2->set (
"compute global constants",
1270 params->get (
"compute global constants: temporaries",
1272 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1273 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1274 bool overrideAllreduce = slist.get (
"MM_TAFC_OverrideAllreduceCheck",
false);
1275 int mm_optimization_core_count =
1277 mm_optimization_core_count =
1278 slist.get (
"MM_TAFC_OptimizationCoreCount",
1279 mm_optimization_core_count);
1280 int mm_optimization_core_count2 =
1281 params->get (
"MM_TAFC_OptimizationCoreCount",
1282 mm_optimization_core_count);
1283 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1284 mm_optimization_core_count = mm_optimization_core_count2;
1286 auto & sip2 = importParams2->sublist (
"matrixmatrix: kernel params",
false);
1287 sip2.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1288 sip2.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1289 sip2.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1292 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1293 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1296 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1301 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1304 bool needs_final_export = ! Atrans->getGraph ()->getExporter ().is_null();
1305 if (needs_final_export) {
1309 Ctemp = rcp (&C,
false);
1312 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1319 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp (&C,
false);
1321 if (needs_final_export) {
1322 ParameterList labelList;
1323 labelList.set(
"Timer Label", label);
1324 if(!params.is_null()) {
1325 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1326 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1328 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1329 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1330 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1331 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1332 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1333 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1335 labelList_subList.set (
"isMatrixMatrix_TransferAndFillComplete", isMM,
1336 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1337 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1338 labelList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1340 Ctemp->exportAndFillComplete (Crcp,
1341 *Ctemp->getGraph ()->getExporter (),
1344 rcp (&labelList,
false));
1346 #ifdef HAVE_TPETRA_MMM_STATISTICS
1347 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1353 template<
class Scalar,
1355 class GlobalOrdinal,
1358 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1359 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1360 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1361 const std::string& ,
1362 const Teuchos::RCP<Teuchos::ParameterList>& )
1364 using Teuchos::Array;
1365 using Teuchos::ArrayRCP;
1366 using Teuchos::ArrayView;
1367 using Teuchos::OrdinalTraits;
1368 using Teuchos::null;
1370 typedef Teuchos::ScalarTraits<Scalar> STS;
1372 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1373 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1375 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1376 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1378 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getLocalElementList();
1379 ArrayView<const GlobalOrdinal> bcols_import = null;
1380 if (Bview.importColMap != null) {
1381 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1382 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1384 bcols_import = Bview.importColMap->getLocalElementList();
1387 size_t C_numCols = C_lastCol - C_firstCol +
1388 OrdinalTraits<LocalOrdinal>::one();
1389 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1390 OrdinalTraits<LocalOrdinal>::one();
1392 if (C_numCols_import > C_numCols)
1393 C_numCols = C_numCols_import;
1395 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1396 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1397 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1399 Array<Scalar> C_row_i = dwork;
1400 Array<GlobalOrdinal> C_cols = iwork;
1401 Array<size_t> c_index = iwork2;
1402 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1403 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1405 size_t C_row_i_length, j, k, last_index;
1408 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1409 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1410 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1411 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1413 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1414 Aview.colMap->getMaxLocalIndex(); i++)
1419 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1420 Aview.colMap->getMaxLocalIndex(); i++) {
1421 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1422 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1423 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1424 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1434 auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
1435 auto Acolind = Aview.origMatrix->getLocalIndicesHost();
1436 auto Avals = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1437 auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
1438 auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
1439 auto Bvals = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1440 decltype(Browptr) Irowptr;
1441 decltype(Bcolind) Icolind;
1442 decltype(Bvals) Ivals;
1443 if(!Bview.importMatrix.is_null()) {
1444 Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
1445 Icolind = Bview.importMatrix->getLocalIndicesHost();
1446 Ivals = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1449 bool C_filled = C.isFillComplete();
1451 for (
size_t i = 0; i < C_numCols; i++)
1452 c_index[i] = OrdinalTraits<size_t>::invalid();
1455 size_t Arows = Aview.rowMap->getLocalNumElements();
1456 for(
size_t i=0; i<Arows; ++i) {
1460 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1466 C_row_i_length = OrdinalTraits<size_t>::zero();
1468 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1469 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1470 const Scalar Aval = Avals[k];
1471 if (Aval == STS::zero())
1474 if (Ak == LO_INVALID)
1477 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1478 LocalOrdinal col = Bcolind[j];
1481 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1484 C_row_i[C_row_i_length] = Aval*Bvals[j];
1485 C_cols[C_row_i_length] = col;
1486 c_index[col] = C_row_i_length;
1491 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Bvals[j]);
1496 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1497 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1498 C_cols[ii] = bcols[C_cols[ii]];
1499 combined_index[ii] = C_cols[ii];
1500 combined_values[ii] = C_row_i[ii];
1502 last_index = C_row_i_length;
1508 C_row_i_length = OrdinalTraits<size_t>::zero();
1510 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1511 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1512 const Scalar Aval = Avals[k];
1513 if (Aval == STS::zero())
1516 if (Ak!=LO_INVALID)
continue;
1518 Ak = Acol2Irow[Acolind[k]];
1519 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1520 LocalOrdinal col = Icolind[j];
1523 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1526 C_row_i[C_row_i_length] = Aval*Ivals[j];
1527 C_cols[C_row_i_length] = col;
1528 c_index[col] = C_row_i_length;
1534 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Ivals[j]);
1539 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1540 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1541 C_cols[ii] = bcols_import[C_cols[ii]];
1542 combined_index[last_index] = C_cols[ii];
1543 combined_values[last_index] = C_row_i[ii];
1550 C.sumIntoGlobalValues(
1552 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1553 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1555 C.insertGlobalValues(
1557 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1558 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1564 template<
class Scalar,
1566 class GlobalOrdinal,
1568 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1569 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1570 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1572 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1573 Mview.maxNumRowEntries = Mview.indices[0].size();
1575 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1576 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1577 Mview.maxNumRowEntries = Mview.indices[i].size();
1582 template<
class CrsMatrixType>
1583 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1585 size_t Aest = 100, Best=100;
1586 if (A.getLocalNumEntries() >= A.getLocalNumRows())
1587 Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries()/A.getLocalNumRows() : 100;
1588 if (B.getLocalNumEntries() >= B.getLocalNumRows())
1589 Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries()/B.getLocalNumRows() : 100;
1591 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1592 nnzperrow *= nnzperrow;
1594 return (
size_t)(A.getLocalNumRows()*nnzperrow*0.75 + 100);
1604 template<
class Scalar,
1606 class GlobalOrdinal,
1608 void mult_A_B_newmatrix(
1609 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1610 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1611 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1612 const std::string& label,
1613 const Teuchos::RCP<Teuchos::ParameterList>& params)
1615 using Teuchos::Array;
1616 using Teuchos::ArrayRCP;
1617 using Teuchos::ArrayView;
1622 typedef LocalOrdinal LO;
1623 typedef GlobalOrdinal GO;
1625 typedef Import<LO,GO,NO> import_type;
1626 typedef Map<LO,GO,NO> map_type;
1629 typedef typename map_type::local_map_type local_map_type;
1631 typedef typename KCRS::StaticCrsGraphType graph_t;
1632 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1633 typedef typename NO::execution_space execution_space;
1634 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1635 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1639 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1642 RCP<const import_type> Cimport;
1643 RCP<const map_type> Ccolmap;
1644 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1645 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1646 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1647 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1648 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1649 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1650 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1651 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1658 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1660 if (Bview.importMatrix.is_null()) {
1663 Ccolmap = Bview.colMap;
1664 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1666 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1667 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1668 KOKKOS_LAMBDA(
const LO i) {
1681 if (!Bimport.is_null() && !Iimport.is_null()) {
1682 Cimport = Bimport->setUnion(*Iimport);
1684 else if (!Bimport.is_null() && Iimport.is_null()) {
1685 Cimport = Bimport->setUnion();
1687 else if (Bimport.is_null() && !Iimport.is_null()) {
1688 Cimport = Iimport->setUnion();
1691 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1693 Ccolmap = Cimport->getTargetMap();
1698 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1699 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1706 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1707 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1708 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1709 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1711 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1712 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1721 C.replaceColMap(Ccolmap);
1739 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
1740 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
1742 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1743 GO aidx = Acolmap_local.getGlobalElement(i);
1744 LO B_LID = Browmap_local.getLocalElement(aidx);
1745 if (B_LID != LO_INVALID) {
1746 targetMapToOrigRow(i) = B_LID;
1747 targetMapToImportRow(i) = LO_INVALID;
1749 LO I_LID = Irowmap_local.getLocalElement(aidx);
1750 targetMapToOrigRow(i) = LO_INVALID;
1751 targetMapToImportRow(i) = I_LID;
1758 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1764 template<
class Scalar,
1766 class GlobalOrdinal,
1768 void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1769 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1770 Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &C)
1772 using Teuchos::null;
1773 using Teuchos::Array;
1774 using Teuchos::ArrayRCP;
1775 using Teuchos::ArrayView;
1780 typedef LocalOrdinal LO;
1781 typedef GlobalOrdinal GO;
1783 typedef Import<LO,GO,NO> import_type;
1784 typedef Map<LO,GO,NO> map_type;
1785 typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
1786 typedef typename block_crs_matrix_type::crs_graph_type graph_t;
1789 typedef typename map_type::local_map_type local_map_type;
1790 typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
1791 typedef typename KBSR::device_type device_t;
1792 typedef typename KBSR::StaticCrsGraphType static_graph_t;
1793 typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
1794 typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
1795 typedef typename KBSR::values_type::non_const_type scalar_view_t;
1796 typedef typename NO::execution_space execution_space;
1797 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1798 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1800 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1803 RCP<const import_type> Cimport;
1804 RCP<const map_type> Ccolmap;
1805 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1806 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1807 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1808 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1809 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1810 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1811 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1812 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1819 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1821 if (Bview.importMatrix.is_null()) {
1824 Ccolmap = Bview.colMap;
1825 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1827 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1828 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1829 KOKKOS_LAMBDA(
const LO i) {
1842 if (!Bimport.is_null() && !Iimport.is_null()) {
1843 Cimport = Bimport->setUnion(*Iimport);
1845 else if (!Bimport.is_null() && Iimport.is_null()) {
1846 Cimport = Bimport->setUnion();
1848 else if (Bimport.is_null() && !Iimport.is_null()) {
1849 Cimport = Iimport->setUnion();
1852 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1854 Ccolmap = Cimport->getTargetMap();
1861 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1862 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1863 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
1864 range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),
1865 KOKKOS_LAMBDA(
const LO i) {
1866 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1868 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
1869 range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),
1870 KOKKOS_LAMBDA(
const LO i) {
1871 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1891 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
1892 Aview.colMap->getLocalNumElements());
1893 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
1894 Aview.colMap->getLocalNumElements());
1896 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",
1897 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),
1898 KOKKOS_LAMBDA(
const LO i) {
1899 GO aidx = Acolmap_local.getGlobalElement(i);
1900 LO B_LID = Browmap_local.getLocalElement(aidx);
1901 if (B_LID != LO_INVALID) {
1902 targetMapToOrigRow(i) = B_LID;
1903 targetMapToImportRow(i) = LO_INVALID;
1905 LO I_LID = Irowmap_local.getLocalElement(aidx);
1906 targetMapToOrigRow(i) = LO_INVALID;
1907 targetMapToImportRow(i) = I_LID;
1912 using KernelHandle =
1913 KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
1914 typename lno_nnz_view_t::const_value_type,
1915 typename scalar_view_t::const_value_type,
1916 typename device_t::execution_space,
1917 typename device_t::memory_space,
1918 typename device_t::memory_space>;
1919 int team_work_size = 16;
1920 std::string myalg(
"SPGEMM_KK_MEMORY");
1921 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
1924 kh.create_spgemm_handle(alg_enum);
1925 kh.set_team_work_size(team_work_size);
1928 const KBSR Amat = Aview.origMatrix->getLocalMatrixDevice();
1929 const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,
1930 targetMapToOrigRow,targetMapToImportRow,
1931 Bcol2Ccol,Icol2Ccol,
1932 Ccolmap.getConst()->getLocalNumElements());
1934 RCP<graph_t> graphC;
1935 typename KBSR::values_type values;
1940 KokkosSparse::block_spgemm_symbolic(kh, Amat,
false, Bmerged,
false, Cmat);
1941 KokkosSparse::block_spgemm_numeric (kh, Amat,
false, Bmerged,
false, Cmat);
1942 kh.destroy_spgemm_handle();
1945 graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
1946 values = Cmat.values;
1948 C = rcp (
new block_crs_matrix_type (*graphC, values, Aview.blocksize));
1954 template<
class Scalar,
1956 class GlobalOrdinal,
1958 class LocalOrdinalViewType>
1959 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1960 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1961 const LocalOrdinalViewType & targetMapToOrigRow,
1962 const LocalOrdinalViewType & targetMapToImportRow,
1963 const LocalOrdinalViewType & Bcol2Ccol,
1964 const LocalOrdinalViewType & Icol2Ccol,
1965 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1966 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
1967 const std::string& label,
1968 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1970 using Teuchos::Array;
1971 using Teuchos::ArrayRCP;
1972 using Teuchos::ArrayView;
1979 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
1980 typedef typename KCRS::StaticCrsGraphType graph_t;
1981 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1982 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1983 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1984 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1987 typedef LocalOrdinal LO;
1988 typedef GlobalOrdinal GO;
1990 typedef Map<LO,GO,NO> map_type;
1991 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1992 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1993 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1996 RCP<const map_type> Ccolmap = C.getColMap();
1997 size_t m = Aview.origMatrix->getLocalNumRows();
1998 size_t n = Ccolmap->getLocalNumElements();
1999 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2002 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2003 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2005 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2006 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2007 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2009 c_lno_view_t Irowptr;
2010 lno_nnz_view_t Icolind;
2011 scalar_view_t Ivals;
2012 if(!Bview.importMatrix.is_null()) {
2013 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2014 Irowptr = lclB.graph.row_map;
2015 Icolind = lclB.graph.entries;
2016 Ivals = lclB.values;
2017 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2027 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2028 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2029 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2030 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2040 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2041 std::vector<size_t> c_status(n, ST_INVALID);
2051 size_t CSR_ip = 0, OLD_ip = 0;
2052 for (
size_t i = 0; i < m; i++) {
2055 Crowptr[i] = CSR_ip;
2058 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2059 LO Aik = Acolind[k];
2060 const SC Aval = Avals[k];
2061 if (Aval == SC_ZERO)
2064 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2071 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2074 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2075 LO Bkj = Bcolind[j];
2076 LO Cij = Bcol2Ccol[Bkj];
2078 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2080 c_status[Cij] = CSR_ip;
2081 Ccolind[CSR_ip] = Cij;
2082 Cvals[CSR_ip] = Aval*Bvals[j];
2086 Cvals[c_status[Cij]] += Aval*Bvals[j];
2097 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2098 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2099 LO Ikj = Icolind[j];
2100 LO Cij = Icol2Ccol[Ikj];
2102 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2104 c_status[Cij] = CSR_ip;
2105 Ccolind[CSR_ip] = Cij;
2106 Cvals[CSR_ip] = Aval*Ivals[j];
2109 Cvals[c_status[Cij]] += Aval*Ivals[j];
2116 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2118 Kokkos::resize(Ccolind,CSR_alloc);
2119 Kokkos::resize(Cvals,CSR_alloc);
2124 Crowptr[m] = CSR_ip;
2127 Kokkos::resize(Ccolind,CSR_ip);
2128 Kokkos::resize(Cvals,CSR_ip);
2134 if (params.is_null() || params->get(
"sort entries",
true))
2135 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2136 C.setAllValues(Crowptr,Ccolind, Cvals);
2152 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2153 labelList->set(
"Timer Label",label);
2154 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2155 RCP<const Export<LO,GO,NO> > dummyExport;
2156 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2162 template<
class Scalar,
2164 class GlobalOrdinal,
2166 void mult_A_B_reuse(
2167 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2168 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2169 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2170 const std::string& label,
2171 const Teuchos::RCP<Teuchos::ParameterList>& params)
2173 using Teuchos::Array;
2174 using Teuchos::ArrayRCP;
2175 using Teuchos::ArrayView;
2180 typedef LocalOrdinal LO;
2181 typedef GlobalOrdinal GO;
2183 typedef Import<LO,GO,NO> import_type;
2184 typedef Map<LO,GO,NO> map_type;
2187 typedef typename map_type::local_map_type local_map_type;
2189 typedef typename KCRS::StaticCrsGraphType graph_t;
2190 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2191 typedef typename NO::execution_space execution_space;
2192 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2193 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2198 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2201 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2202 RCP<const map_type> Ccolmap = C.getColMap();
2203 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2204 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2205 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2206 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2207 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2208 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2211 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2215 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2216 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2219 if (!Bview.importMatrix.is_null()) {
2220 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2221 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2223 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2224 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2225 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2231 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2232 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2233 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2234 GO aidx = Acolmap_local.getGlobalElement(i);
2235 LO B_LID = Browmap_local.getLocalElement(aidx);
2236 if (B_LID != LO_INVALID) {
2237 targetMapToOrigRow(i) = B_LID;
2238 targetMapToImportRow(i) = LO_INVALID;
2240 LO I_LID = Irowmap_local.getLocalElement(aidx);
2241 targetMapToOrigRow(i) = LO_INVALID;
2242 targetMapToImportRow(i) = I_LID;
2249 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2253 template<
class Scalar,
2255 class GlobalOrdinal,
2257 class LocalOrdinalViewType>
2258 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2259 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2260 const LocalOrdinalViewType & targetMapToOrigRow,
2261 const LocalOrdinalViewType & targetMapToImportRow,
2262 const LocalOrdinalViewType & Bcol2Ccol,
2263 const LocalOrdinalViewType & Icol2Ccol,
2264 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2265 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2266 const std::string& label,
2267 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2273 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2274 typedef typename KCRS::StaticCrsGraphType graph_t;
2275 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2276 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2277 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2280 typedef LocalOrdinal LO;
2281 typedef GlobalOrdinal GO;
2283 typedef Map<LO,GO,NO> map_type;
2284 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2285 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2286 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2292 RCP<const map_type> Ccolmap = C.getColMap();
2293 size_t m = Aview.origMatrix->getLocalNumRows();
2294 size_t n = Ccolmap->getLocalNumElements();
2297 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2298 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2299 const KCRS & Cmat = C.getLocalMatrixHost();
2301 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2302 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2303 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2304 scalar_view_t Cvals = Cmat.values;
2306 c_lno_view_t Irowptr;
2307 lno_nnz_view_t Icolind;
2308 scalar_view_t Ivals;
2309 if(!Bview.importMatrix.is_null()) {
2310 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2311 Irowptr = lclB.graph.row_map;
2312 Icolind = lclB.graph.entries;
2313 Ivals = lclB.values;
2325 std::vector<size_t> c_status(n, ST_INVALID);
2328 size_t CSR_ip = 0, OLD_ip = 0;
2329 for (
size_t i = 0; i < m; i++) {
2332 OLD_ip = Crowptr[i];
2333 CSR_ip = Crowptr[i+1];
2334 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2335 c_status[Ccolind[k]] = k;
2341 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2342 LO Aik = Acolind[k];
2343 const SC Aval = Avals[k];
2344 if (Aval == SC_ZERO)
2347 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2349 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2351 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2352 LO Bkj = Bcolind[j];
2353 LO Cij = Bcol2Ccol[Bkj];
2355 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2356 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2357 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2359 Cvals[c_status[Cij]] += Aval * Bvals[j];
2364 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2365 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2366 LO Ikj = Icolind[j];
2367 LO Cij = Icol2Ccol[Ikj];
2369 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2370 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2371 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2373 Cvals[c_status[Cij]] += Aval * Ivals[j];
2381 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2388 template<
class Scalar,
2390 class GlobalOrdinal,
2392 void jacobi_A_B_newmatrix(
2394 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2395 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2396 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2397 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2398 const std::string& label,
2399 const Teuchos::RCP<Teuchos::ParameterList>& params)
2401 using Teuchos::Array;
2402 using Teuchos::ArrayRCP;
2403 using Teuchos::ArrayView;
2407 typedef LocalOrdinal LO;
2408 typedef GlobalOrdinal GO;
2411 typedef Import<LO,GO,NO> import_type;
2412 typedef Map<LO,GO,NO> map_type;
2413 typedef typename map_type::local_map_type local_map_type;
2417 typedef typename KCRS::StaticCrsGraphType graph_t;
2418 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2419 typedef typename NO::execution_space execution_space;
2420 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2421 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2425 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2428 RCP<const import_type> Cimport;
2429 RCP<const map_type> Ccolmap;
2430 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2431 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2432 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2433 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2434 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2435 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2436 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2437 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2444 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2446 if (Bview.importMatrix.is_null()) {
2449 Ccolmap = Bview.colMap;
2453 Kokkos::RangePolicy<execution_space, LO> range (0, static_cast<LO> (Bview.colMap->getLocalNumElements ()));
2454 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2455 Bcol2Ccol(i) =
static_cast<LO
> (i);
2466 if (!Bimport.is_null() && !Iimport.is_null()){
2467 Cimport = Bimport->setUnion(*Iimport);
2468 Ccolmap = Cimport->getTargetMap();
2470 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2471 Cimport = Bimport->setUnion();
2473 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2474 Cimport = Iimport->setUnion();
2477 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2479 Ccolmap = Cimport->getTargetMap();
2481 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2482 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2489 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2490 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2491 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2492 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2494 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2495 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2504 C.replaceColMap(Ccolmap);
2522 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2523 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2524 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2525 GO aidx = Acolmap_local.getGlobalElement(i);
2526 LO B_LID = Browmap_local.getLocalElement(aidx);
2527 if (B_LID != LO_INVALID) {
2528 targetMapToOrigRow(i) = B_LID;
2529 targetMapToImportRow(i) = LO_INVALID;
2531 LO I_LID = Irowmap_local.getLocalElement(aidx);
2532 targetMapToOrigRow(i) = LO_INVALID;
2533 targetMapToImportRow(i) = I_LID;
2540 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2549 template<
class Scalar,
2551 class GlobalOrdinal,
2553 class LocalOrdinalViewType>
2554 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2555 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2556 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2557 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2558 const LocalOrdinalViewType & targetMapToOrigRow,
2559 const LocalOrdinalViewType & targetMapToImportRow,
2560 const LocalOrdinalViewType & Bcol2Ccol,
2561 const LocalOrdinalViewType & Icol2Ccol,
2562 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2563 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2564 const std::string& label,
2565 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2569 using Teuchos::Array;
2570 using Teuchos::ArrayRCP;
2571 using Teuchos::ArrayView;
2576 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2577 typedef typename KCRS::StaticCrsGraphType graph_t;
2578 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2579 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2580 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2581 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2584 typedef typename scalar_view_t::memory_space scalar_memory_space;
2587 typedef LocalOrdinal LO;
2588 typedef GlobalOrdinal GO;
2591 typedef Map<LO,GO,NO> map_type;
2592 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2593 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2596 RCP<const map_type> Ccolmap = C.getColMap();
2597 size_t m = Aview.origMatrix->getLocalNumRows();
2598 size_t n = Ccolmap->getLocalNumElements();
2599 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2602 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2603 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2605 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2606 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2607 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2609 c_lno_view_t Irowptr;
2610 lno_nnz_view_t Icolind;
2611 scalar_view_t Ivals;
2612 if(!Bview.importMatrix.is_null()) {
2613 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2614 Irowptr = lclB.graph.row_map;
2615 Icolind = lclB.graph.entries;
2616 Ivals = lclB.values;
2617 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2622 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2630 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2631 Array<size_t> c_status(n, ST_INVALID);
2640 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2641 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2642 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2643 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2644 size_t CSR_ip = 0, OLD_ip = 0;
2646 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2660 for (
size_t i = 0; i < m; i++) {
2663 Crowptr[i] = CSR_ip;
2664 SC minusOmegaDval = -omega*Dvals(i,0);
2667 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2668 Scalar Bval = Bvals[j];
2669 if (Bval == SC_ZERO)
2671 LO Bij = Bcolind[j];
2672 LO Cij = Bcol2Ccol[Bij];
2675 c_status[Cij] = CSR_ip;
2676 Ccolind[CSR_ip] = Cij;
2677 Cvals[CSR_ip] = Bvals[j];
2682 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2683 LO Aik = Acolind[k];
2684 const SC Aval = Avals[k];
2685 if (Aval == SC_ZERO)
2688 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2690 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2692 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2693 LO Bkj = Bcolind[j];
2694 LO Cij = Bcol2Ccol[Bkj];
2696 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2698 c_status[Cij] = CSR_ip;
2699 Ccolind[CSR_ip] = Cij;
2700 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2704 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2710 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2711 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2712 LO Ikj = Icolind[j];
2713 LO Cij = Icol2Ccol[Ikj];
2715 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2717 c_status[Cij] = CSR_ip;
2718 Ccolind[CSR_ip] = Cij;
2719 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2722 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2729 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2731 Kokkos::resize(Ccolind,CSR_alloc);
2732 Kokkos::resize(Cvals,CSR_alloc);
2736 Crowptr[m] = CSR_ip;
2739 Kokkos::resize(Ccolind,CSR_ip);
2740 Kokkos::resize(Cvals,CSR_ip);
2749 C.replaceColMap(Ccolmap);
2756 if (params.is_null() || params->get(
"sort entries",
true))
2757 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2758 C.setAllValues(Crowptr,Ccolind, Cvals);
2771 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2772 labelList->set(
"Timer Label",label);
2773 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2774 RCP<const Export<LO,GO,NO> > dummyExport;
2775 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2783 template<
class Scalar,
2785 class GlobalOrdinal,
2787 void jacobi_A_B_reuse(
2789 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2790 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2791 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2792 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2793 const std::string& label,
2794 const Teuchos::RCP<Teuchos::ParameterList>& params)
2796 using Teuchos::Array;
2797 using Teuchos::ArrayRCP;
2798 using Teuchos::ArrayView;
2802 typedef LocalOrdinal LO;
2803 typedef GlobalOrdinal GO;
2806 typedef Import<LO,GO,NO> import_type;
2807 typedef Map<LO,GO,NO> map_type;
2810 typedef typename map_type::local_map_type local_map_type;
2812 typedef typename KCRS::StaticCrsGraphType graph_t;
2813 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2814 typedef typename NO::execution_space execution_space;
2815 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2816 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2818 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2819 lo_view_t Bcol2Ccol, Icol2Ccol;
2820 lo_view_t targetMapToOrigRow;
2821 lo_view_t targetMapToImportRow;
2825 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2828 RCP<const map_type> Ccolmap = C.getColMap();
2829 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2830 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2831 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2832 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2833 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2834 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2837 Bcol2Ccol = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements());
2841 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2842 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2845 if (!Bview.importMatrix.is_null()) {
2846 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2847 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2849 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2850 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2851 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2857 targetMapToOrigRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2858 targetMapToImportRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2859 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2860 GO aidx = Acolmap_local.getGlobalElement(i);
2861 LO B_LID = Browmap_local.getLocalElement(aidx);
2862 if (B_LID != LO_INVALID) {
2863 targetMapToOrigRow(i) = B_LID;
2864 targetMapToImportRow(i) = LO_INVALID;
2866 LO I_LID = Irowmap_local.getLocalElement(aidx);
2867 targetMapToOrigRow(i) = LO_INVALID;
2868 targetMapToImportRow(i) = I_LID;
2877 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2883 template<
class Scalar,
2885 class GlobalOrdinal,
2887 class LocalOrdinalViewType>
2888 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2889 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2890 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2891 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2892 const LocalOrdinalViewType & targetMapToOrigRow,
2893 const LocalOrdinalViewType & targetMapToImportRow,
2894 const LocalOrdinalViewType & Bcol2Ccol,
2895 const LocalOrdinalViewType & Icol2Ccol,
2896 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2897 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2898 const std::string& label,
2899 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2909 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2910 typedef typename KCRS::StaticCrsGraphType graph_t;
2911 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2912 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2913 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2914 typedef typename scalar_view_t::memory_space scalar_memory_space;
2917 typedef LocalOrdinal LO;
2918 typedef GlobalOrdinal GO;
2920 typedef Map<LO,GO,NO> map_type;
2921 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2922 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2923 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2926 RCP<const map_type> Ccolmap = C.getColMap();
2927 size_t m = Aview.origMatrix->getLocalNumRows();
2928 size_t n = Ccolmap->getLocalNumElements();
2931 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2932 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2933 const KCRS & Cmat = C.getLocalMatrixHost();
2935 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2936 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2937 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2938 scalar_view_t Cvals = Cmat.values;
2940 c_lno_view_t Irowptr;
2941 lno_nnz_view_t Icolind;
2942 scalar_view_t Ivals;
2943 if(!Bview.importMatrix.is_null()) {
2944 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2945 Irowptr = lclB.graph.row_map;
2946 Icolind = lclB.graph.entries;
2947 Ivals = lclB.values;
2952 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2960 std::vector<size_t> c_status(n, ST_INVALID);
2963 size_t CSR_ip = 0, OLD_ip = 0;
2964 for (
size_t i = 0; i < m; i++) {
2968 OLD_ip = Crowptr[i];
2969 CSR_ip = Crowptr[i+1];
2970 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2971 c_status[Ccolind[k]] = k;
2977 SC minusOmegaDval = -omega*Dvals(i,0);
2980 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2981 Scalar Bval = Bvals[j];
2982 if (Bval == SC_ZERO)
2984 LO Bij = Bcolind[j];
2985 LO Cij = Bcol2Ccol[Bij];
2987 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2988 std::runtime_error,
"Trying to insert a new entry into a static graph");
2990 Cvals[c_status[Cij]] = Bvals[j];
2994 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2995 LO Aik = Acolind[k];
2996 const SC Aval = Avals[k];
2997 if (Aval == SC_ZERO)
3000 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3002 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3004 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3005 LO Bkj = Bcolind[j];
3006 LO Cij = Bcol2Ccol[Bkj];
3008 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3009 std::runtime_error,
"Trying to insert a new entry into a static graph");
3011 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3016 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3017 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3018 LO Ikj = Icolind[j];
3019 LO Cij = Icol2Ccol[Ikj];
3021 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3022 std::runtime_error,
"Trying to insert a new entry into a static graph");
3024 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3033 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3041 template<
class Scalar,
3043 class GlobalOrdinal,
3045 void import_and_extract_views(
3046 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3047 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3048 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3049 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3050 bool userAssertsThereAreNoRemotes,
3051 const std::string& label,
3052 const Teuchos::RCP<Teuchos::ParameterList>& params)
3054 using Teuchos::Array;
3055 using Teuchos::ArrayView;
3058 using Teuchos::null;
3061 typedef LocalOrdinal LO;
3062 typedef GlobalOrdinal GO;
3065 typedef Map<LO,GO,NO> map_type;
3066 typedef Import<LO,GO,NO> import_type;
3067 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3078 Aview.deleteContents();
3080 Aview.origMatrix = rcp(&A,
false);
3082 Aview.origMatrix->getApplyHelper();
3083 Aview.origRowMap = A.getRowMap();
3084 Aview.rowMap = targetMap;
3085 Aview.colMap = A.getColMap();
3086 Aview.domainMap = A.getDomainMap();
3087 Aview.importColMap = null;
3088 RCP<const map_type> rowMap = A.getRowMap();
3089 const int numProcs = rowMap->getComm()->getSize();
3092 if (userAssertsThereAreNoRemotes || numProcs < 2)
3095 RCP<const import_type> importer;
3096 if (params != null && params->isParameter(
"importer")) {
3097 importer = params->get<RCP<const import_type> >(
"importer");
3104 RCP<const map_type> remoteRowMap;
3105 size_t numRemote = 0;
3107 if (!prototypeImporter.is_null() &&
3108 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3109 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3113 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3114 numRemote = prototypeImporter->getNumRemoteIDs();
3116 Array<GO> remoteRows(numRemote);
3117 for (
size_t i = 0; i < numRemote; i++)
3118 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3120 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3121 rowMap->getIndexBase(), rowMap->getComm()));
3124 }
else if (prototypeImporter.is_null()) {
3128 ArrayView<const GO> rows = targetMap->getLocalElementList();
3129 size_t numRows = targetMap->getLocalNumElements();
3131 Array<GO> remoteRows(numRows);
3132 for(
size_t i = 0; i < numRows; ++i) {
3133 const LO mlid = rowMap->getLocalElement(rows[i]);
3135 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3136 remoteRows[numRemote++] = rows[i];
3138 remoteRows.resize(numRemote);
3139 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3140 rowMap->getIndexBase(), rowMap->getComm()));
3149 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3150 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3162 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3164 if (globalMaxNumRemote > 0) {
3169 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3171 importer = rcp(
new import_type(rowMap, remoteRowMap));
3173 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3177 params->set(
"importer", importer);
3180 if (importer != null) {
3184 Teuchos::ParameterList labelList;
3185 labelList.set(
"Timer Label", label);
3186 auto & labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3189 bool overrideAllreduce =
false;
3192 Teuchos::ParameterList params_sublist;
3193 if(!params.is_null()) {
3194 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3195 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3196 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3197 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3198 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
3199 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3200 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3202 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
3203 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3204 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
3206 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3207 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3209 Aview.importMatrix->getApplyHelper();
3215 sprintf(str,
"import_matrix.%d.dat",count);
3220 #ifdef HAVE_TPETRA_MMM_STATISTICS
3221 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3228 Aview.importColMap = Aview.importMatrix->getColMap();
3235 template<
class Scalar,
3237 class GlobalOrdinal,
3239 void import_and_extract_views(
3240 const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
3241 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3242 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
3243 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3244 bool userAssertsThereAreNoRemotes)
3246 using Teuchos::Array;
3247 using Teuchos::ArrayView;
3250 using Teuchos::null;
3253 typedef LocalOrdinal LO;
3254 typedef GlobalOrdinal GO;
3257 typedef Map<LO,GO,NO> map_type;
3258 typedef Import<LO,GO,NO> import_type;
3259 typedef BlockCrsMatrix<SC,LO,GO,NO> blockcrs_matrix_type;
3267 Mview.deleteContents();
3269 Mview.origMatrix = rcp(&M,
false);
3271 Mview.origMatrix->getApplyHelper();
3272 Mview.origRowMap = M.getRowMap();
3273 Mview.rowMap = targetMap;
3274 Mview.colMap = M.getColMap();
3275 Mview.importColMap = null;
3276 RCP<const map_type> rowMap = M.getRowMap();
3277 const int numProcs = rowMap->getComm()->getSize();
3280 if (userAssertsThereAreNoRemotes || numProcs < 2)
return;
3284 RCP<const map_type> remoteRowMap;
3285 size_t numRemote = 0;
3287 if (!prototypeImporter.is_null() &&
3288 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3289 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3292 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3293 numRemote = prototypeImporter->getNumRemoteIDs();
3295 Array<GO> remoteRows(numRemote);
3296 for (
size_t i = 0; i < numRemote; i++)
3297 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3299 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3300 rowMap->getIndexBase(), rowMap->getComm()));
3303 }
else if (prototypeImporter.is_null()) {
3306 ArrayView<const GO> rows = targetMap->getLocalElementList();
3307 size_t numRows = targetMap->getLocalNumElements();
3309 Array<GO> remoteRows(numRows);
3310 for(
size_t i = 0; i < numRows; ++i) {
3311 const LO mlid = rowMap->getLocalElement(rows[i]);
3313 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3314 remoteRows[numRemote++] = rows[i];
3316 remoteRows.resize(numRemote);
3317 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3318 rowMap->getIndexBase(), rowMap->getComm()));
3327 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3328 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3336 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3338 RCP<const import_type> importer;
3340 if (globalMaxNumRemote > 0) {
3343 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3345 importer = rcp(
new import_type(rowMap, remoteRowMap));
3347 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
3350 if (importer != null) {
3353 Mview.importMatrix = Tpetra::importAndFillCompleteBlockCrsMatrix<blockcrs_matrix_type>(rcpFromRef(M), *importer);
3355 Mview.importMatrix->getApplyHelper();
3358 Mview.importColMap = Mview.importMatrix->getColMap();
3364 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3366 merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3367 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3368 const LocalOrdinalViewType & Acol2Brow,
3369 const LocalOrdinalViewType & Acol2Irow,
3370 const LocalOrdinalViewType & Bcol2Ccol,
3371 const LocalOrdinalViewType & Icol2Ccol,
3372 const size_t mergedNodeNumCols) {
3376 typedef typename KCRS::StaticCrsGraphType graph_t;
3377 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3378 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3379 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3381 const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
3382 const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3385 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3389 RCP<const KCRS> Ik_;
3390 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrixDevice());
3391 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3393 if(Ik!=0) Iks = *Ik;
3394 size_t merge_numrows = Ak.numCols();
3396 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3398 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3401 typedef typename Node::execution_space execution_space;
3402 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3403 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3404 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3405 if(
final) Mrowptr(i) = update;
3408 if(Acol2Brow(i)!=LO_INVALID)
3409 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3411 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3414 if(
final && i+1==merge_numrows)
3415 Mrowptr(i+1)=update;
3419 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3420 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3421 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz);
3424 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3425 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3426 if(Acol2Brow(i)!=LO_INVALID) {
3427 size_t row = Acol2Brow(i);
3428 size_t start = Bk.graph.row_map(row);
3429 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3430 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3431 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3435 size_t row = Acol2Irow(i);
3436 size_t start = Iks.graph.row_map(row);
3437 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3438 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3439 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3444 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3455 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3456 const typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type
3457 merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3458 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3459 const LocalOrdinalViewType & Acol2Brow,
3460 const LocalOrdinalViewType & Acol2Irow,
3461 const LocalOrdinalViewType & Bcol2Ccol,
3462 const LocalOrdinalViewType & Icol2Ccol,
3463 const size_t mergedNodeNumCols)
3466 typedef typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type KBCRS;
3467 typedef typename KBCRS::StaticCrsGraphType graph_t;
3468 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3469 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3470 typedef typename KBCRS::values_type::non_const_type scalar_view_t;
3473 const KBCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
3474 const KBCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3477 if(!Bview.importMatrix.is_null() ||
3478 (Bview.importMatrix.is_null() &&
3479 (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3484 RCP<const KBCRS> Ik_;
3485 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KBCRS>(Bview.importMatrix->getLocalMatrixDevice());
3486 const KBCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3488 if(Ik!=0) Iks = *Ik;
3489 size_t merge_numrows = Ak.numCols();
3492 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3494 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3497 typedef typename Node::execution_space execution_space;
3498 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3499 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3500 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3501 if(
final) Mrowptr(i) = update;
3504 if(Acol2Brow(i)!=LO_INVALID)
3505 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3507 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3510 if(
final && i+1==merge_numrows)
3511 Mrowptr(i+1)=update;
3515 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3516 const int blocksize = Ak.blockDim();
3517 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3518 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz*blocksize*blocksize);
3521 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3522 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3523 if(Acol2Brow(i)!=LO_INVALID) {
3524 size_t row = Acol2Brow(i);
3525 size_t start = Bk.graph.row_map(row);
3526 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3527 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3529 for (
int b=0; b<blocksize*blocksize; ++b) {
3530 const int val_indx = j*blocksize*blocksize + b;
3531 const int b_val_indx = (j-Mrowptr(i)+
start)*blocksize*blocksize + b;
3532 Mvalues(val_indx) = Bk.values(b_val_indx);
3537 size_t row = Acol2Irow(i);
3538 size_t start = Iks.graph.row_map(row);
3539 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3540 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3542 for (
int b=0; b<blocksize*blocksize; ++b) {
3543 const int val_indx = j*blocksize*blocksize + b;
3544 const int b_val_indx = (j-Mrowptr(i)+
start)*blocksize*blocksize + b;
3545 Mvalues(val_indx) = Iks.values(b_val_indx);
3552 KBCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind, blocksize);
3562 template<
typename SC,
typename LO,
typename GO,
typename NO>
3563 void AddKernels<SC, LO, GO, NO>::
3565 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3566 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3567 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3568 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3569 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3570 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3571 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3572 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3574 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3575 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3576 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3578 using Teuchos::TimeMonitor;
3580 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3581 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3582 auto nrows = Arowptrs.extent(0) - 1;
3583 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3584 typename AddKern::KKH handle;
3585 handle.create_spadd_handle(
true);
3586 auto addHandle = handle.get_spadd_handle();
3590 KokkosSparse::spadd_symbolic
3592 nrows, numGlobalCols,
3593 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3595 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3596 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3599 KokkosSparse::spadd_numeric(&handle,
3600 nrows, numGlobalCols,
3601 Arowptrs, Acolinds, Avals, scalarA,
3602 Browptrs, Bcolinds, Bvals, scalarB,
3603 Crowptrs, Ccolinds, Cvals);
3606 template<
typename SC,
typename LO,
typename GO,
typename NO>
3607 void AddKernels<SC, LO, GO, NO>::
3609 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3610 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3611 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3612 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3613 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3614 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3615 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3616 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3618 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3619 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3620 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3622 using Teuchos::TimeMonitor;
3624 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3625 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3626 auto nrows = Arowptrs.extent(0) - 1;
3627 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3628 typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
3629 typename AddKern::KKH handle;
3630 handle.create_spadd_handle(
false);
3631 auto addHandle = handle.get_spadd_handle();
3634 KokkosSparse::spadd_symbolic
3636 nrows, numGlobalCols,
3637 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3639 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3640 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3642 KokkosSparse::spadd_numeric(&handle,
3643 nrows, numGlobalCols,
3644 Arowptrs, Acolinds, Avals, scalarA,
3645 Browptrs, Bcolinds, Bvals, scalarB,
3646 Crowptrs, Ccolinds, Cvals);
3649 template<
typename GO,
3650 typename LocalIndicesType,
3651 typename GlobalIndicesType,
3652 typename ColMapType>
3653 struct ConvertLocalToGlobalFunctor
3655 ConvertLocalToGlobalFunctor(
3656 const LocalIndicesType& colindsOrig_,
3657 const GlobalIndicesType& colindsConverted_,
3658 const ColMapType& colmap_) :
3659 colindsOrig (colindsOrig_),
3660 colindsConverted (colindsConverted_),
3663 KOKKOS_INLINE_FUNCTION
void
3664 operator() (
const GO i)
const
3666 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
3668 LocalIndicesType colindsOrig;
3669 GlobalIndicesType colindsConverted;
3673 template<
typename SC,
typename LO,
typename GO,
typename NO>
3674 void MMdetails::AddKernels<SC, LO, GO, NO>::
3675 convertToGlobalAndAdd(
3676 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
3677 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3678 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
3679 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3680 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
3681 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
3682 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3683 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3684 typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
3686 using Teuchos::TimeMonitor;
3690 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
3691 typename NO::execution_space,
typename NO::memory_space,
typename NO::memory_space>;
3693 const values_array Avals = A.values;
3694 const values_array Bvals = B.values;
3695 const col_inds_array Acolinds = A.graph.entries;
3696 const col_inds_array Bcolinds = B.graph.entries;
3697 auto Arowptrs = A.graph.row_map;
3698 auto Browptrs = B.graph.row_map;
3699 global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
3700 global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
3704 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
3705 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
3706 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
3707 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
3709 handle.create_spadd_handle(
false);
3710 auto addHandle = handle.get_spadd_handle();
3713 auto nrows = Arowptrs.extent(0) - 1;
3714 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3715 KokkosSparse::spadd_symbolic
3718 Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
3719 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3720 Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3724 KokkosSparse::spadd_numeric(&handle,
3726 Arowptrs, AcolindsConverted, Avals, scalarA,
3727 Browptrs, BcolindsConverted, Bvals, scalarB,
3728 Crowptrs, Ccolinds, Cvals);
3744 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3746 void MatrixMatrix::Multiply( \
3747 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3749 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3751 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3752 bool call_FillComplete_on_result, \
3753 const std::string & label, \
3754 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3757 void MatrixMatrix::Multiply( \
3758 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& A, \
3760 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& B, \
3762 Teuchos::RCP<BlockCrsMatrix< SCALAR , LO , GO , NODE > >& C, \
3763 const std::string & label); \
3766 void MatrixMatrix::Jacobi( \
3768 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3769 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3770 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3771 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3772 bool call_FillComplete_on_result, \
3773 const std::string & label, \
3774 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3777 void MatrixMatrix::Add( \
3778 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3781 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3784 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > >& C); \
3787 void MatrixMatrix::Add( \
3788 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3791 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3794 const Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > >& C); \
3797 void MatrixMatrix::Add( \
3798 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3801 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3805 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3806 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3807 (const SCALAR & alpha, \
3808 const bool transposeA, \
3809 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3810 const SCALAR & beta, \
3811 const bool transposeB, \
3812 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3813 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3814 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3815 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3819 MatrixMatrix::add< SCALAR , LO, GO , NODE > \
3820 (const SCALAR & alpha, \
3821 const bool transposeA, \
3822 const CrsMatrix< SCALAR , LO, GO , NODE >& A, \
3823 const SCALAR& beta, \
3824 const bool transposeB, \
3825 const CrsMatrix< SCALAR , LO, GO , NODE >& B, \
3826 CrsMatrix< SCALAR , LO, GO , NODE >& C, \
3827 const Teuchos::RCP<const Map<LO, GO , NODE > >& domainMap, \
3828 const Teuchos::RCP<const Map<LO, GO , NODE > >& rangeMap, \
3829 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3831 template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>; \
3833 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M, \
3834 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3835 CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3836 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3837 bool userAssertsThereAreNoRemotes, \
3838 const std::string& label, \
3839 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3841 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M, \
3842 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3843 BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3844 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3845 bool userAssertsThereAreNoRemotes);
3848 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
Struct that holds views of the contents of a BlockCrsMatrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
Utility functions for packing and unpacking sparse matrix entries.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void start()
Start the deep_copy counter.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
Teuchos::RCP< crs_matrix_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A distributed dense vector.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Matrix Market file readers and writers for Tpetra objects.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.
Forward declaration of some Tpetra Matrix Matrix objects.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.