10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
25 #include "Tpetra_computeRowAndColumnOneNorms.hpp"
34 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
42 #include "Tpetra_Details_packCrsMatrix.hpp"
43 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
45 #include "Teuchos_FancyOStream.hpp"
46 #include "Teuchos_RCP.hpp"
47 #include "Teuchos_DataAccess.hpp"
48 #include "Teuchos_SerialDenseMatrix.hpp"
49 #include "KokkosBlas1_scal.hpp"
50 #include "KokkosSparse_getDiagCopy.hpp"
51 #include "KokkosSparse_spmv.hpp"
64 template <
class T,
class BinaryFunction>
65 T atomic_binary_function_update(T*
const dest,
78 T newVal = f(assume, inputVal);
79 oldVal = Kokkos::atomic_compare_exchange(dest, assume, newVal);
80 }
while (assume != oldVal);
100 template <
class Scalar>
104 typedef Teuchos::ScalarTraits<Scalar> STS;
105 return std::max(STS::magnitude(x), STS::magnitude(y));
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
116 CrsMatrix(
const Teuchos::RCP<const map_type>& rowMap,
117 size_t maxNumEntriesPerRow,
118 const Teuchos::RCP<Teuchos::ParameterList>& params)
120 const char tfecfFuncName[] =
121 "CrsMatrix(RCP<const Map>, size_t "
122 "[, RCP<ParameterList>]): ";
123 Teuchos::RCP<crs_graph_type> graph;
125 graph = Teuchos::rcp(
new crs_graph_type(rowMap, maxNumEntriesPerRow,
127 }
catch (std::exception& e) {
128 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
129 "CrsGraph constructor (RCP<const Map>, "
130 "size_t [, RCP<ParameterList>]) threw an exception: "
137 staticGraph_ = myGraph_;
142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
145 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
146 const Teuchos::RCP<Teuchos::ParameterList>& params)
148 const char tfecfFuncName[] =
149 "CrsMatrix(RCP<const Map>, "
150 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
151 Teuchos::RCP<crs_graph_type> graph;
156 }
catch (std::exception& e) {
157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
158 "CrsGraph constructor "
159 "(RCP<const Map>, ArrayView<const size_t>"
160 "[, RCP<ParameterList>]) threw an exception: "
167 staticGraph_ = graph;
172 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
175 const Teuchos::RCP<const map_type>& colMap,
176 const size_t maxNumEntPerRow,
177 const Teuchos::RCP<Teuchos::ParameterList>& params)
179 const char tfecfFuncName[] =
180 "CrsMatrix(RCP<const Map>, "
181 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
182 const char suffix[] =
183 " Please report this bug to the Tpetra developers.";
186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!staticGraph_.is_null(), std::logic_error,
187 "staticGraph_ is not null at the beginning of the constructor."
189 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!myGraph_.is_null(), std::logic_error,
190 "myGraph_ is not null at the beginning of the constructor."
192 Teuchos::RCP<crs_graph_type> graph;
197 }
catch (std::exception& e) {
198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
199 "CrsGraph constructor (RCP<const Map>, "
200 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
208 staticGraph_ = myGraph_;
213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
216 const Teuchos::RCP<const map_type>& colMap,
217 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
218 const Teuchos::RCP<Teuchos::ParameterList>& params)
220 const char tfecfFuncName[] =
221 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
222 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
223 Teuchos::RCP<crs_graph_type> graph;
228 }
catch (std::exception& e) {
229 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
230 "CrsGraph constructor (RCP<const Map>, "
231 "RCP<const Map>, ArrayView<const size_t>[, "
232 "RCP<ParameterList>]) threw an exception: "
239 staticGraph_ = graph;
244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
246 CrsMatrix(
const Teuchos::RCP<const crs_graph_type>& graph,
247 const Teuchos::RCP<Teuchos::ParameterList>& )
249 , staticGraph_(graph)
250 , storageStatus_(Details::STORAGE_1D_PACKED) {
252 typedef typename local_matrix_device_type::values_type values_type;
253 const char tfecfFuncName[] =
254 "CrsMatrix(RCP<const CrsGraph>[, "
255 "RCP<ParameterList>]): ";
258 std::unique_ptr<std::string> prefix;
260 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
261 std::ostringstream os;
262 os << *prefix <<
"Start" << endl;
263 std::cerr << os.str();
266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(graph.is_null(), std::runtime_error,
"Input graph is null.");
267 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::runtime_error,
269 "is not fill complete. You must call fillComplete on the "
270 "graph before using it to construct a CrsMatrix. Note that "
271 "calling resumeFill on the graph makes it not fill complete, "
272 "even if you had previously called fillComplete. In that "
273 "case, you must call fillComplete on the graph again.");
281 const size_t numEnt = graph->lclIndsPacked_wdv.extent(0);
283 std::ostringstream os;
284 os << *prefix <<
"Allocate values: " << numEnt << endl;
285 std::cerr << os.str();
288 values_type val(
"Tpetra::CrsMatrix::values", numEnt);
290 valuesUnpacked_wdv = valuesPacked_wdv;
295 std::ostringstream os;
296 os << *prefix <<
"Done" << endl;
297 std::cerr << os.str();
301 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
304 const Teuchos::RCP<const crs_graph_type>& graph,
305 const Teuchos::RCP<Teuchos::ParameterList>& params)
307 , staticGraph_(graph)
308 , storageStatus_(matrix.storageStatus_) {
309 const char tfecfFuncName[] =
310 "CrsMatrix(RCP<const CrsGraph>, "
311 "local_matrix_device_type::values_type, "
312 "[,RCP<ParameterList>]): ";
313 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(graph.is_null(), std::runtime_error,
"Input graph is null.");
314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::runtime_error,
316 "is not fill complete. You must call fillComplete on the "
317 "graph before using it to construct a CrsMatrix. Note that "
318 "calling resumeFill on the graph makes it not fill complete, "
319 "even if you had previously called fillComplete. In that "
320 "case, you must call fillComplete on the graph again.");
322 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
323 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
325 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
326 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
333 CrsMatrix(
const Teuchos::RCP<const crs_graph_type>& graph,
334 const typename local_matrix_device_type::values_type& values,
335 const Teuchos::RCP<Teuchos::ParameterList>& )
337 , staticGraph_(graph)
338 , storageStatus_(Details::STORAGE_1D_PACKED) {
339 const char tfecfFuncName[] =
340 "CrsMatrix(RCP<const CrsGraph>, "
341 "local_matrix_device_type::values_type, "
342 "[,RCP<ParameterList>]): ";
343 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(graph.is_null(), std::runtime_error,
"Input graph is null.");
344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::runtime_error,
346 "is not fill complete. You must call fillComplete on the "
347 "graph before using it to construct a CrsMatrix. Note that "
348 "calling resumeFill on the graph makes it not fill complete, "
349 "even if you had previously called fillComplete. In that "
350 "case, you must call fillComplete on the graph again.");
359 valuesUnpacked_wdv = valuesPacked_wdv;
370 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
373 const Teuchos::RCP<const map_type>& colMap,
374 const typename local_graph_device_type::row_map_type& rowPointers,
375 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
376 const typename local_matrix_device_type::values_type& values,
377 const Teuchos::RCP<Teuchos::ParameterList>& params)
379 , storageStatus_(Details::STORAGE_1D_PACKED) {
380 using Details::getEntryOnHost;
383 const char tfecfFuncName[] =
384 "Tpetra::CrsMatrix(RCP<const Map>, "
385 "RCP<const Map>, ptr, ind, val[, params]): ";
386 const char suffix[] =
387 ". Please report this bug to the Tpetra developers.";
391 std::unique_ptr<std::string> prefix;
393 prefix = this->createPrefix(
394 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
395 std::ostringstream os;
396 os << *prefix <<
"Start" << endl;
397 std::cerr << os.str();
404 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.extent(0) != columnIndices.extent(0),
405 std::invalid_argument,
"values.extent(0)=" << values.extent(0) <<
" != columnIndices.extent(0) = " << columnIndices.extent(0) <<
".");
406 if (debug && rowPointers.extent(0) != 0) {
407 const size_t numEnt =
408 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
409 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numEnt !=
size_t(columnIndices.extent(0)) ||
410 numEnt !=
size_t(values.extent(0)),
411 std::invalid_argument,
412 "Last entry of rowPointers says that "
415 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
416 "columnIndices and values don't match this. "
417 "columnIndices.extent(0)="
418 << columnIndices.extent(0)
419 <<
" and values.extent(0)=" << values.extent(0) <<
".");
422 RCP<crs_graph_type> graph;
424 graph = Teuchos::rcp(
new crs_graph_type(rowMap, colMap, rowPointers,
425 columnIndices, params));
426 }
catch (std::exception& e) {
427 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
428 "CrsGraph constructor (RCP<const Map>, "
429 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
437 auto lclGraph = graph->getLocalGraphDevice();
438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclGraph.row_map.extent(0) != rowPointers.extent(0) ||
439 lclGraph.entries.extent(0) != columnIndices.extent(0),
441 "CrsGraph's constructor (rowMap, colMap, ptr, "
442 "ind[, params]) did not set the local graph correctly."
444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclGraph.entries.extent(0) != values.extent(0),
446 "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
447 "params]) did not set the local graph correctly. "
448 "lclGraph.entries.extent(0) = "
449 << lclGraph.entries.extent(0)
450 <<
" != values.extent(0) = " << values.extent(0) << suffix);
456 staticGraph_ = graph;
466 valuesUnpacked_wdv = valuesPacked_wdv;
475 std::ostringstream os;
476 os << *prefix <<
"Done" << endl;
477 std::cerr << os.str();
481 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
484 const Teuchos::RCP<const map_type>& colMap,
485 const Teuchos::ArrayRCP<size_t>& ptr,
486 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
487 const Teuchos::ArrayRCP<Scalar>& val,
488 const Teuchos::RCP<Teuchos::ParameterList>& params)
490 , storageStatus_(Details::STORAGE_1D_PACKED) {
491 using Kokkos::Compat::getKokkosViewDeepCopy;
492 using Teuchos::av_reinterpret_cast;
494 using values_type =
typename local_matrix_device_type::values_type;
496 const char tfecfFuncName[] =
497 "Tpetra::CrsMatrix(RCP<const Map>, "
498 "RCP<const Map>, ptr, ind, val[, params]): ";
500 RCP<crs_graph_type> graph;
504 }
catch (std::exception& e) {
505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
506 "CrsGraph constructor (RCP<const Map>, "
507 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
508 "RCP<ParameterList>]) threw an exception: "
515 staticGraph_ = graph;
528 auto lclGraph = staticGraph_->getLocalGraphDevice();
529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(lclGraph.row_map.extent(0)) !=
size_t(ptr.size()) ||
530 size_t(lclGraph.entries.extent(0)) !=
size_t(ind.size()),
532 "CrsGraph's constructor (rowMap, colMap, "
533 "ptr, ind[, params]) did not set the local graph correctly. "
534 "Please report this bug to the Tpetra developers.");
537 getKokkosViewDeepCopy<device_type>(av_reinterpret_cast<IST>(val()));
539 valuesUnpacked_wdv = valuesPacked_wdv;
549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
552 const Teuchos::RCP<const map_type>& colMap,
554 const Teuchos::RCP<Teuchos::ParameterList>& params)
556 , storageStatus_(Details::STORAGE_1D_PACKED)
557 , fillComplete_(true) {
558 const char tfecfFuncName[] =
559 "Tpetra::CrsMatrix(RCP<const Map>, "
560 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
561 const char suffix[] =
562 " Please report this bug to the Tpetra developers.";
564 Teuchos::RCP<crs_graph_type> graph;
567 lclMatrix.graph, params));
568 }
catch (std::exception& e) {
569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
570 "CrsGraph constructor (RCP<const Map>, "
571 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
575 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::logic_error,
576 "CrsGraph constructor (RCP"
577 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
578 "did not produce a fill-complete graph. Please report this bug to the "
579 "Tpetra developers.");
584 staticGraph_ = graph;
587 valuesUnpacked_wdv = valuesPacked_wdv;
589 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
isFillActive(), std::logic_error,
590 "At the end of a CrsMatrix constructor that should produce "
591 "a fillComplete matrix, isFillActive() is true."
593 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!
isFillComplete(), std::logic_error,
595 "CrsMatrix constructor that should produce a fillComplete "
596 "matrix, isFillComplete() is false."
601 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
604 const Teuchos::RCP<const map_type>& rowMap,
605 const Teuchos::RCP<const map_type>& colMap,
606 const Teuchos::RCP<const map_type>& domainMap,
607 const Teuchos::RCP<const map_type>& rangeMap,
608 const Teuchos::RCP<Teuchos::ParameterList>& params)
610 , storageStatus_(Details::STORAGE_1D_PACKED)
611 , fillComplete_(true) {
612 const char tfecfFuncName[] =
613 "Tpetra::CrsMatrix(RCP<const Map>, "
614 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
615 "local_matrix_device_type[, RCP<ParameterList>]): ";
616 const char suffix[] =
617 " Please report this bug to the Tpetra developers.";
619 Teuchos::RCP<crs_graph_type> graph;
621 graph = Teuchos::rcp(
new crs_graph_type(lclMatrix.graph, rowMap, colMap,
622 domainMap, rangeMap, params));
623 }
catch (std::exception& e) {
624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
625 "CrsGraph constructor (RCP<const Map>, "
626 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
627 "RCP<ParameterList>]) threw an exception: "
630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::logic_error,
632 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
633 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
634 "not produce a fillComplete graph."
640 staticGraph_ = graph;
643 valuesUnpacked_wdv = valuesPacked_wdv;
645 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
isFillActive(), std::logic_error,
646 "At the end of a CrsMatrix constructor that should produce "
647 "a fillComplete matrix, isFillActive() is true."
649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!
isFillComplete(), std::logic_error,
651 "CrsMatrix constructor that should produce a fillComplete "
652 "matrix, isFillComplete() is false."
657 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
660 const Teuchos::RCP<const map_type>& rowMap,
661 const Teuchos::RCP<const map_type>& colMap,
662 const Teuchos::RCP<const map_type>& domainMap,
663 const Teuchos::RCP<const map_type>& rangeMap,
664 const Teuchos::RCP<const import_type>& importer,
665 const Teuchos::RCP<const export_type>& exporter,
666 const Teuchos::RCP<Teuchos::ParameterList>& params)
668 , storageStatus_(Details::STORAGE_1D_PACKED)
669 , fillComplete_(true) {
671 const char tfecfFuncName[] =
673 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
674 const char suffix[] =
675 " Please report this bug to the Tpetra developers.";
677 Teuchos::RCP<crs_graph_type> graph;
680 domainMap, rangeMap, importer,
682 }
catch (std::exception& e) {
683 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
684 "CrsGraph constructor "
685 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!graph->isFillComplete(), std::logic_error,
691 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
692 "Export, params) did not produce a fill-complete graph. "
693 "Please report this bug to the Tpetra developers.");
698 staticGraph_ = graph;
701 valuesUnpacked_wdv = valuesPacked_wdv;
703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
isFillActive(), std::logic_error,
704 "At the end of a CrsMatrix constructor that should produce "
705 "a fillComplete matrix, isFillActive() is true."
707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!
isFillComplete(), std::logic_error,
709 "CrsMatrix constructor that should produce a fillComplete "
710 "matrix, isFillComplete() is false."
715 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
718 const Teuchos::DataAccess copyOrView)
720 , staticGraph_(source.getCrsGraph())
721 , storageStatus_(source.storageStatus_) {
722 const char tfecfFuncName[] =
724 "const CrsMatrix&, const Teuchos::DataAccess): ";
725 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!source.
isFillComplete(), std::invalid_argument,
726 "Source graph must be fillComplete().");
728 if (copyOrView == Teuchos::Copy) {
729 using values_type =
typename local_matrix_device_type::values_type;
731 using Kokkos::view_alloc;
732 using Kokkos::WithoutInitializing;
733 values_type newvals(view_alloc(
"val", WithoutInitializing),
738 valuesUnpacked_wdv = valuesPacked_wdv;
740 }
else if (copyOrView == Teuchos::View) {
745 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument,
746 "Second argument 'copyOrView' "
747 "has an invalid value "
748 << copyOrView <<
". Valid values "
749 "include Teuchos::Copy = "
750 << Teuchos::Copy <<
" and "
752 << Teuchos::View <<
".");
757 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
760 std::swap(crs_matrix.
importMV_, this->importMV_);
761 std::swap(crs_matrix.
exportMV_, this->exportMV_);
762 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
763 std::swap(crs_matrix.myGraph_, this->myGraph_);
764 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
765 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
768 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
771 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
772 Teuchos::RCP<const Teuchos::Comm<int>>
775 return getCrsGraphRef().getComm();
778 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
781 return fillComplete_;
784 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
787 return !fillComplete_;
790 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
793 return this->getCrsGraphRef().isStorageOptimized();
796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
799 return getCrsGraphRef().isLocallyIndexed();
802 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
805 return getCrsGraphRef().isGloballyIndexed();
808 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
811 return getCrsGraphRef().hasColMap();
814 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
818 return getCrsGraphRef().getGlobalNumEntries();
821 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
825 return getCrsGraphRef().getLocalNumEntries();
828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
832 return getCrsGraphRef().getGlobalNumRows();
835 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
839 return getCrsGraphRef().getGlobalNumCols();
842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
846 return getCrsGraphRef().getLocalNumRows();
849 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
853 return getCrsGraphRef().getLocalNumCols();
856 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
860 return getCrsGraphRef().getNumEntriesInGlobalRow(globalRow);
863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
867 return getCrsGraphRef().getNumEntriesInLocalRow(localRow);
870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
874 return getCrsGraphRef().getGlobalMaxNumRowEntries();
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
881 return getCrsGraphRef().getLocalMaxNumRowEntries();
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 return getRowMap()->getIndexBase();
891 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
892 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
895 return getCrsGraphRef().getRowMap();
898 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
899 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
902 return getCrsGraphRef().getColMap();
905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
906 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
909 return getCrsGraphRef().getDomainMap();
912 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
913 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
916 return getCrsGraphRef().getRangeMap();
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
920 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node>>
923 if (staticGraph_ != Teuchos::null) {
929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
930 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
933 if (staticGraph_ != Teuchos::null) {
939 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
943 #ifdef HAVE_TPETRA_DEBUG
944 constexpr
bool debug =
true;
946 constexpr
bool debug =
false;
947 #endif // HAVE_TPETRA_DEBUG
949 if (!this->staticGraph_.is_null()) {
950 return *(this->staticGraph_);
953 const char tfecfFuncName[] =
"getCrsGraphRef: ";
954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->myGraph_.is_null(), std::logic_error,
955 "Both staticGraph_ and myGraph_ are null. "
956 "Please report this bug to the Tpetra developers.");
958 return *(this->myGraph_);
962 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
963 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
966 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
969 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
970 staticGraph_->getLocalGraphDevice());
973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
974 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
977 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
978 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
979 valuesPacked_wdv.getHostView(Access::ReadWrite),
980 staticGraph_->getLocalGraphHost());
983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
986 return myGraph_.is_null();
989 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1001 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1004 const bool verbose) {
1008 const char tfecfFuncName[] =
"allocateValues: ";
1009 const char suffix[] =
1010 " Please report this bug to the Tpetra developers.";
1011 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1013 std::unique_ptr<std::string> prefix;
1015 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1016 std::ostringstream os;
1017 os << *prefix <<
"lg: "
1018 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1020 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1021 <<
"Allocated" << endl;
1022 std::cerr << os.str();
1025 const bool debug = Behavior::debug(
"CrsMatrix");
1027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->staticGraph_.is_null(), std::logic_error,
1028 "staticGraph_ is null." << suffix);
1033 if ((gas == GraphAlreadyAllocated) !=
1034 staticGraph_->indicesAreAllocated()) {
1036 "The caller has asserted that the graph "
1039 "already allocated, but the static graph "
1040 "says that its indices are ";
1041 const char err3[] =
"already allocated. ";
1042 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gas == GraphAlreadyAllocated &&
1043 !staticGraph_->indicesAreAllocated(),
1045 err1 << err2 <<
"not " << err3 << suffix);
1046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gas != GraphAlreadyAllocated &&
1047 staticGraph_->indicesAreAllocated(),
1049 err1 <<
"not " << err2 << err3 << suffix);
1057 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->staticGraph_->indicesAreAllocated() &&
1058 this->myGraph_.is_null(),
1060 "The static graph says that its indices are not allocated, "
1061 "but the graph is not owned by the matrix."
1065 if (gas == GraphNotYetAllocated) {
1067 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->myGraph_.is_null(), std::logic_error,
1068 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1071 this->myGraph_->allocateIndices(lg, verbose);
1072 }
catch (std::exception& e) {
1073 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
1074 "CrsGraph::allocateIndices "
1075 "threw an exception: "
1078 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
1079 "CrsGraph::allocateIndices "
1080 "threw an exception not a subclass of std::exception.");
1085 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1087 const size_t lclNumRows = this->staticGraph_->getLocalNumRows();
1088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1089 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1093 using values_type =
typename local_matrix_device_type::values_type;
1095 std::ostringstream os;
1096 os << *prefix <<
"Allocate values_wdv: Pre "
1097 << valuesUnpacked_wdv.extent(0) <<
", post "
1098 << lclTotalNumEntries << endl;
1099 std::cerr << os.str();
1103 values_type(
"Tpetra::CrsMatrix::values",
1104 lclTotalNumEntries));
1107 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1111 using Teuchos::arcp_const_cast;
1112 using Teuchos::Array;
1113 using Teuchos::ArrayRCP;
1114 using Teuchos::null;
1118 using ::Tpetra::Details::getEntryOnHost;
1119 using row_map_type =
typename local_graph_device_type::row_map_type;
1120 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1121 using values_type =
typename local_matrix_device_type::values_type;
1124 const char tfecfFuncName[] =
1125 "fillLocalGraphAndMatrix (called from "
1126 "fillComplete or expertStaticFillComplete): ";
1127 const char suffix[] =
1128 " Please report this bug to the Tpetra developers.";
1132 std::unique_ptr<std::string> prefix;
1134 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1135 std::ostringstream os;
1136 os << *prefix << endl;
1137 std::cerr << os.str();
1143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_.is_null(), std::logic_error,
1144 "The nonconst graph "
1145 "(myGraph_) is null. This means that the matrix has a "
1146 "const (a.k.a. \"static\") graph. fillComplete or "
1147 "expertStaticFillComplete should never call "
1148 "fillLocalGraphAndMatrix in that case."
1152 const size_t lclNumRows = this->getLocalNumRows();
1167 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1168 myGraph_->rowPtrsUnpacked_dev_;
1171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowOffsets.extent(0) == 0, std::logic_error,
1172 "curRowOffsets.extent(0) == 0.");
1173 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowOffsets.extent(0) != lclNumRows + 1, std::logic_error,
1174 "curRowOffsets.extent(0) = "
1175 << curRowOffsets.extent(0) <<
" != lclNumRows + 1 = "
1176 << (lclNumRows + 1) <<
".");
1177 const size_t numOffsets = curRowOffsets.extent(0);
1178 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
1180 myGraph_->lclIndsUnpacked_wdv.extent(0) != valToCheck,
1181 std::logic_error,
"numOffsets = " << numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = " << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
" != curRowOffsets(" << numOffsets <<
") = " << valToCheck <<
".");
1184 if (myGraph_->getLocalNumEntries() !=
1185 myGraph_->getLocalAllocationSize()) {
1188 typename row_map_type::non_const_type k_ptrs;
1189 row_map_type k_ptrs_const;
1190 lclinds_1d_type k_inds;
1194 std::ostringstream os;
1195 const auto numEnt = myGraph_->getLocalNumEntries();
1196 const auto allocSize = myGraph_->getLocalAllocationSize();
1197 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1198 <<
", allocSize=" << allocSize << endl;
1199 std::cerr << os.str();
1207 if (debug && curRowOffsets.extent(0) != 0) {
1208 const size_t numOffsets =
1209 static_cast<size_t>(curRowOffsets.extent(0));
1210 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) !=
1212 static_cast<size_t>(valuesUnpacked_wdv.extent(0)),
1214 "(unpacked branch) Before "
1215 "allocating or packing, curRowOffsets("
1217 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1219 << valuesUnpacked_wdv.extent(0) <<
".");
1220 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) !=
1221 static_cast<size_t>(myGraph_->lclIndsUnpacked_wdv.extent(0)),
1223 "(unpacked branch) Before "
1224 "allocating or packing, curRowOffsets("
1226 <<
") = " << valToCheck
1227 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1228 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
".");
1236 size_t lclTotalNumEntries = 0;
1242 std::ostringstream os;
1243 os << *prefix <<
"Allocate packed row offsets: "
1244 << (lclNumRows + 1) << endl;
1245 std::cerr << os.str();
1247 typename row_map_type::non_const_type
1248 packedRowOffsets(
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1249 typename row_entries_type::const_type numRowEnt_h =
1250 myGraph_->k_numRowEntries_;
1253 lclTotalNumEntries =
1257 k_ptrs = packedRowOffsets;
1258 k_ptrs_const = k_ptrs;
1262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(k_ptrs.extent(0)) != lclNumRows + 1,
1264 "(unpacked branch) After packing k_ptrs, "
1265 "k_ptrs.extent(0) = "
1266 << k_ptrs.extent(0) <<
" != "
1268 << (lclNumRows + 1) <<
".");
1269 const auto valToCheck = getEntryOnHost(k_ptrs, lclNumRows);
1270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
1271 "(unpacked branch) After filling k_ptrs, "
1272 "k_ptrs(lclNumRows="
1273 << lclNumRows <<
") = " << valToCheck
1274 <<
" != total number of entries on the calling process = "
1275 << lclTotalNumEntries <<
".");
1280 std::ostringstream os;
1281 os << *prefix <<
"Allocate packed local column indices: "
1282 << lclTotalNumEntries << endl;
1283 std::cerr << os.str();
1285 k_inds = lclinds_1d_type(
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1287 std::ostringstream os;
1288 os << *prefix <<
"Allocate packed values: "
1289 << lclTotalNumEntries << endl;
1290 std::cerr << os.str();
1292 k_vals = values_type(
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1304 using inds_packer_type = pack_functor<
1305 typename Graph::local_graph_device_type::entries_type::non_const_type,
1306 typename Graph::local_inds_dualv_type::t_dev::const_type,
1307 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1308 typename Graph::local_graph_device_type::row_map_type>;
1309 inds_packer_type indsPacker(
1311 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1312 k_ptrs, curRowOffsets);
1314 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1315 Kokkos::parallel_for(
"Tpetra::CrsMatrix pack column indices",
1316 range_type(0, lclNumRows), indsPacker);
1320 using vals_packer_type = pack_functor<
1321 typename values_type::non_const_type,
1322 typename values_type::const_type,
1323 typename row_map_type::non_const_type,
1324 typename row_map_type::const_type>;
1325 vals_packer_type valsPacker(
1327 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1328 k_ptrs, curRowOffsets);
1329 Kokkos::parallel_for(
"Tpetra::CrsMatrix pack values",
1330 range_type(0, lclNumRows), valsPacker);
1333 const char myPrefix[] =
1334 "(\"Optimize Storage\""
1335 "=true branch) After packing, ";
1336 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(k_ptrs.extent(0) == 0, std::logic_error, myPrefix <<
"k_ptrs.extent(0) = 0. This probably means that "
1337 "rowPtrsUnpacked_ was never allocated.");
1338 if (k_ptrs.extent(0) != 0) {
1339 const size_t numOffsets(k_ptrs.extent(0));
1340 const auto valToCheck =
1341 getEntryOnHost(k_ptrs, numOffsets - 1);
1342 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(valToCheck) != k_vals.extent(0),
1343 std::logic_error, myPrefix <<
"k_ptrs(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != k_vals.extent(0) = " << k_vals.extent(0) <<
".");
1344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(valToCheck) != k_inds.extent(0),
1345 std::logic_error, myPrefix <<
"k_ptrs(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != k_inds.extent(0) = " << k_inds.extent(0) <<
".");
1349 myGraph_->setRowPtrsPacked(k_ptrs_const);
1350 myGraph_->lclIndsPacked_wdv =
1356 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1357 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1358 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1359 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1360 valuesPacked_wdv = valuesUnpacked_wdv;
1363 std::ostringstream os;
1364 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1365 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1366 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1367 << valuesUnpacked_wdv.extent(0) << endl;
1368 std::cerr << os.str();
1372 const char myPrefix[] =
1373 "(\"Optimize Storage\"=false branch) ";
1374 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1375 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->rowPtrsUnpacked_dev_.extent(0) == 0, std::logic_error, myPrefix <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1376 "that rowPtrsUnpacked_ was never allocated.");
1377 if (myGraph_->rowPtrsUnpacked_dev_.extent(0) != 0) {
1378 const size_t numOffsets = rowPtrsUnpackedHost.extent(0);
1379 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1380 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(valToCheck) != valuesPacked_wdv.extent(0),
1381 std::logic_error, myPrefix <<
"k_ptrs_const(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != valuesPacked_wdv.extent(0) = " << valuesPacked_wdv.extent(0) <<
".");
1382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(valToCheck) != myGraph_->lclIndsPacked_wdv.extent(0),
1383 std::logic_error, myPrefix <<
"k_ptrs_const(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != myGraph_->lclIndsPacked.extent(0) = " << myGraph_->lclIndsPacked_wdv.extent(0) <<
".");
1389 const char myPrefix[] =
"After packing, ";
1390 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(rowPtrsPackedHost.extent(0)) !=
size_t(lclNumRows + 1),
1392 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = " << rowPtrsPackedHost.extent(0) <<
" != lclNumRows+1 = " << (lclNumRows + 1) <<
".");
1393 if (rowPtrsPackedHost.extent(0) != 0) {
1394 const size_t numOffsets(rowPtrsPackedHost.extent(0));
1395 const size_t valToCheck = rowPtrsPackedHost(numOffsets - 1);
1396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(valuesPacked_wdv.extent(0)),
1397 std::logic_error, myPrefix <<
"k_ptrs_const(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != valuesPacked_wdv.extent(0) = " << valuesPacked_wdv.extent(0) <<
".");
1398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(myGraph_->lclIndsPacked_wdv.extent(0)),
1399 std::logic_error, myPrefix <<
"k_ptrs_const(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = " << myGraph_->lclIndsPacked_wdv.extent(0) <<
".");
1407 const bool defaultOptStorage =
1408 !isStaticGraph() || staticGraph_->isStorageOptimized();
1409 const bool requestOptimizedStorage =
1410 (!params.is_null() &&
1411 params->get(
"Optimize Storage", defaultOptStorage)) ||
1412 (params.is_null() && defaultOptStorage);
1417 if (requestOptimizedStorage) {
1422 std::ostringstream os;
1423 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1424 << myGraph_->k_numRowEntries_.extent(0) << endl;
1425 std::cerr << os.str();
1428 myGraph_->k_numRowEntries_ = row_entries_type();
1433 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1434 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1435 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1436 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1437 valuesUnpacked_wdv = valuesPacked_wdv;
1439 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1440 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1443 std::ostringstream os;
1444 os << *prefix <<
"User requested NOT to optimize storage"
1446 std::cerr << os.str();
1451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1455 using Teuchos::Array;
1456 using Teuchos::ArrayRCP;
1457 using Teuchos::null;
1460 using ::Tpetra::Details::ProfilingRegion;
1461 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1462 using non_const_row_map_type =
typename row_map_type::non_const_type;
1463 using values_type =
typename local_matrix_device_type::values_type;
1464 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1465 const size_t lclNumRows = getLocalNumRows();
1468 std::unique_ptr<std::string> prefix;
1470 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1471 std::ostringstream os;
1472 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1473 std::cerr << os.str();
1485 size_t nodeNumEntries = staticGraph_->getLocalNumEntries();
1486 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize();
1487 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1489 row_map_type k_ptrs;
1495 bool requestOptimizedStorage =
true;
1496 const bool default_OptimizeStorage =
1497 !isStaticGraph() || staticGraph_->isStorageOptimized();
1498 if (!params.is_null() &&
1499 !params->get(
"Optimize Storage", default_OptimizeStorage)) {
1500 requestOptimizedStorage =
false;
1507 if (!staticGraph_->isStorageOptimized() &&
1508 requestOptimizedStorage) {
1510 "You requested optimized storage "
1511 "by setting the \"Optimize Storage\" flag to \"true\" in "
1512 "the ParameterList, or by virtue of default behavior. "
1513 "However, the associated CrsGraph was filled separately and "
1514 "requested not to optimize storage. Therefore, the "
1515 "CrsMatrix cannot optimize storage.");
1516 requestOptimizedStorage =
false;
1541 if (nodeNumEntries != nodeNumAllocated) {
1543 std::ostringstream os;
1544 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1545 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1547 std::cerr << os.str();
1552 std::ostringstream os;
1553 os << *prefix <<
"Allocate packed row offsets: "
1554 << (lclNumRows + 1) << endl;
1555 std::cerr << os.str();
1557 non_const_row_map_type tmpk_ptrs(
"Tpetra::CrsGraph::ptr",
1562 size_t lclTotalNumEntries = 0;
1565 typename row_entries_type::const_type numRowEnt_h =
1566 staticGraph_->k_numRowEntries_;
1568 lclTotalNumEntries =
1575 std::ostringstream os;
1576 os << *prefix <<
"Allocate packed values: "
1577 << lclTotalNumEntries << endl;
1578 std::cerr << os.str();
1580 k_vals = values_type(
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1584 typename values_type::non_const_type,
1585 typename values_type::const_type,
1586 typename row_map_type::non_const_type,
1587 typename row_map_type::const_type>
1588 valsPacker(k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1589 tmpk_ptrs, k_rowPtrs);
1592 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1593 Kokkos::parallel_for(
"Tpetra::CrsMatrix pack values",
1594 range_type(0, lclNumRows), valsPacker);
1597 valuesPacked_wdv = valuesUnpacked_wdv;
1599 std::ostringstream os;
1600 os << *prefix <<
"Storage already packed: "
1601 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1602 std::cerr << os.str();
1607 if (requestOptimizedStorage) {
1610 valuesUnpacked_wdv = valuesPacked_wdv;
1612 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1616 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1620 const typename crs_graph_type::SLocalGlobalViews& newInds,
1621 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1622 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1623 const ELocalGlobal lg,
1624 const ELocalGlobal I) {
1625 const size_t oldNumEnt = rowInfo.numEntries;
1626 const size_t numInserted = graph.insertIndices(rowInfo, newInds, lg, I);
1632 if (numInserted > 0) {
1633 const size_t startOffset = oldNumEnt;
1634 memcpy((
void*)&oldRowVals[startOffset], &newRowVals[0],
1635 numInserted *
sizeof(impl_scalar_type));
1639 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1642 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1643 const Teuchos::ArrayView<const Scalar>& values,
1646 const char tfecfFuncName[] =
"insertLocalValues: ";
1648 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
1649 "Fill is not active. After calling fillComplete, you must call "
1650 "resumeFill before you may insert entries into the matrix again.");
1651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStaticGraph(), std::runtime_error,
1652 "Cannot insert indices with static graph; use replaceLocalValues() "
1656 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(graph.
colMap_.is_null(), std::runtime_error,
1657 "Cannot insert local indices without a column map.");
1660 "Graph indices are global; use "
1661 "insertGlobalValues().");
1662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), std::runtime_error,
1663 "values.size() = " << values.size()
1664 <<
" != indices.size() = " << indices.size() <<
".");
1665 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1666 !graph.
rowMap_->isNodeLocalElement(lclRow), std::runtime_error,
1667 "Local row index " << lclRow <<
" does not belong to this process.");
1669 if (!graph.indicesAreAllocated()) {
1673 this->allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
1676 #ifdef HAVE_TPETRA_DEBUG
1677 const size_t numEntriesToAdd =
static_cast<size_t>(indices.size());
1682 using Teuchos::toString;
1685 Teuchos::Array<LocalOrdinal> badColInds;
1686 bool allInColMap =
true;
1687 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1689 allInColMap =
false;
1690 badColInds.push_back(indices[k]);
1694 std::ostringstream os;
1695 os <<
"You attempted to insert entries in owned row " << lclRow
1696 <<
", at the following column indices: " << toString(indices)
1698 os <<
"Of those, the following indices are not in the column Map on "
1700 << toString(badColInds) <<
"." << endl
1702 "the matrix has a column Map already, it is invalid to insert "
1703 "entries at those locations.";
1704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
1707 #endif // HAVE_TPETRA_DEBUG
1711 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1713 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) { valsView[offset] += values[k]; };
1714 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1715 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1716 }
else if (CM ==
INSERT) {
1717 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) { valsView[offset] = values[k]; };
1718 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1719 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1721 std::ostringstream os;
1722 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1723 <<
"but this has not been implemented." << endl;
1724 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
1728 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1731 const LocalOrdinal numEnt,
1732 const Scalar vals[],
1733 const LocalOrdinal cols[],
1735 Teuchos::ArrayView<const LocalOrdinal> colsT(cols, numEnt);
1736 Teuchos::ArrayView<const Scalar> valsT(vals, numEnt);
1737 this->insertLocalValues(localRow, colsT, valsT, CM);
1740 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1744 const GlobalOrdinal gblColInds[],
1746 const size_t numInputEnt) {
1747 #ifdef HAVE_TPETRA_DEBUG
1748 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1750 const size_t curNumEnt = rowInfo.numEntries;
1751 #endif // HAVE_TPETRA_DEBUG
1753 if (!graph.indicesAreAllocated()) {
1756 using ::Tpetra::Details::Behavior;
1757 const bool verbose = Behavior::verbose(
"CrsMatrix");
1758 this->allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
1763 rowInfo = graph.
getRowInfo(rowInfo.localRow);
1766 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1767 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1768 valsView[offset] += vals[k];
1770 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1771 #ifdef HAVE_TPETRA_DEBUG
1777 #ifdef HAVE_TPETRA_DEBUG
1778 size_t newNumEnt = curNumEnt + numInserted;
1779 const size_t chkNewNumEnt =
1781 if (chkNewNumEnt != newNumEnt) {
1782 std::ostringstream os;
1784 <<
"newNumEnt = " << newNumEnt
1785 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1786 <<
") = " << chkNewNumEnt <<
"." << std::endl
1787 <<
"\torigNumEnt: " << origNumEnt << std::endl
1788 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1789 <<
"\tgblColInds: [";
1790 for (
size_t k = 0; k < numInputEnt; ++k) {
1791 os << gblColInds[k];
1792 if (k +
size_t(1) < numInputEnt) {
1796 os <<
"]" << std::endl
1798 for (
size_t k = 0; k < numInputEnt; ++k) {
1800 if (k +
size_t(1) < numInputEnt) {
1804 os <<
"]" << std::endl;
1806 if (this->supportsRowViews()) {
1807 values_host_view_type vals2;
1808 if (this->isGloballyIndexed()) {
1809 global_inds_host_view_type gblColInds2;
1810 const GlobalOrdinal gblRow =
1811 graph.
rowMap_->getGlobalElement(rowInfo.localRow);
1813 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid()) {
1814 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1817 bool getViewThrew =
false;
1819 this->getGlobalRowView(gblRow, gblColInds2, vals2);
1820 }
catch (std::exception& e) {
1821 getViewThrew =
true;
1822 os <<
"getGlobalRowView threw exception:" << std::endl
1823 << e.what() << std::endl;
1825 if (!getViewThrew) {
1826 os <<
"\tNew global column indices: ";
1827 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1828 os << gblColInds2[jjj] <<
" ";
1830 os <<
"\tNew values: ";
1831 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1832 os << vals2[jjj] <<
" ";
1836 }
else if (this->isLocallyIndexed()) {
1837 local_inds_host_view_type lclColInds2;
1838 this->getLocalRowView(rowInfo.localRow, lclColInds2, vals2);
1839 os <<
"\tNew local column indices: ";
1840 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1841 os << lclColInds2[jjj] <<
" ";
1843 os <<
"\tNew values: ";
1844 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1845 os << vals2[jjj] <<
" ";
1850 os <<
"Please report this bug to the Tpetra developers.";
1851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error, os.str());
1853 #endif // HAVE_TPETRA_DEBUG
1856 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1859 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1860 const Teuchos::ArrayView<const Scalar>& values) {
1862 using Teuchos::toString;
1864 typedef LocalOrdinal LO;
1865 typedef GlobalOrdinal GO;
1866 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1867 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1868 const char tfecfFuncName[] =
"insertGlobalValues: ";
1870 #ifdef HAVE_TPETRA_DEBUG
1871 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), std::runtime_error,
1872 "values.size() = " << values.size() <<
" != indices.size() = "
1873 << indices.size() <<
".");
1874 #endif // HAVE_TPETRA_DEBUG
1878 const map_type& rowMap = *(this->getCrsGraphRef().rowMap_);
1881 if (lclRow == OTLO::invalid()) {
1888 this->insertNonownedGlobalValues(gblRow, indices, values);
1890 if (this->isStaticGraph()) {
1892 const int myRank = rowMap.getComm()->getRank();
1893 const int numProcs = rowMap.getComm()->getSize();
1894 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
1895 "The matrix was constructed with a constant (\"static\") graph, "
1896 "yet the given global row index "
1897 << gblRow <<
" is in the row "
1898 "Map on the calling process (with rank "
1899 << myRank <<
", of " << numProcs <<
" process(es)). In this case, you may not insert "
1900 "new entries into rows owned by the calling process.");
1904 const IST*
const inputVals =
1905 reinterpret_cast<const IST*
>(values.getRawPtr());
1906 const GO*
const inputGblColInds = indices.getRawPtr();
1907 const size_t numInputEnt = indices.size();
1916 if (!graph.
colMap_.is_null()) {
1922 #ifdef HAVE_TPETRA_DEBUG
1923 Teuchos::Array<GO> badColInds;
1924 #endif // HAVE_TPETRA_DEBUG
1925 const size_type numEntriesToInsert = indices.size();
1926 bool allInColMap =
true;
1927 for (size_type k = 0; k < numEntriesToInsert; ++k) {
1929 allInColMap =
false;
1930 #ifdef HAVE_TPETRA_DEBUG
1931 badColInds.push_back(indices[k]);
1934 #endif // HAVE_TPETRA_DEBUG
1938 std::ostringstream os;
1939 os <<
"You attempted to insert entries in owned row " << gblRow
1940 <<
", at the following column indices: " << toString(indices)
1942 #ifdef HAVE_TPETRA_DEBUG
1943 os <<
"Of those, the following indices are not in the column Map "
1945 << toString(badColInds) <<
"." << endl
1946 <<
"Since the matrix has a column Map already, it is invalid "
1947 "to insert entries at those locations.";
1949 os <<
"At least one of those indices is not in the column Map "
1952 <<
"It is invalid to insert into "
1953 "columns not in the column Map on the process that owns the "
1955 #endif // HAVE_TPETRA_DEBUG
1956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
1960 this->insertGlobalValuesImpl(graph, rowInfo, inputGblColInds,
1961 inputVals, numInputEnt);
1965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1968 const LocalOrdinal numEnt,
1969 const Scalar vals[],
1970 const GlobalOrdinal inds[]) {
1971 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numEnt);
1972 Teuchos::ArrayView<const Scalar> valsT(vals, numEnt);
1973 this->insertGlobalValues(globalRow, indsT, valsT);
1976 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1979 const GlobalOrdinal gblRow,
1980 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1981 const Teuchos::ArrayView<const Scalar>& values,
1983 typedef impl_scalar_type IST;
1984 typedef LocalOrdinal LO;
1985 typedef GlobalOrdinal GO;
1986 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1987 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
1990 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), std::runtime_error,
1991 "values.size() = " << values.size() <<
" != indices.size() = "
1992 << indices.size() <<
".");
1997 const map_type& rowMap = *(this->getCrsGraphRef().rowMap_);
1998 const LO lclRow = rowMap.getLocalElement(gblRow);
1999 if (lclRow == OTLO::invalid()) {
2006 this->insertNonownedGlobalValues(gblRow, indices, values);
2008 if (this->isStaticGraph()) {
2010 const int myRank = rowMap.getComm()->getRank();
2011 const int numProcs = rowMap.getComm()->getSize();
2012 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
2013 "The matrix was constructed with a constant (\"static\") graph, "
2014 "yet the given global row index "
2015 << gblRow <<
" is in the row "
2016 "Map on the calling process (with rank "
2017 << myRank <<
", of " << numProcs <<
" process(es)). In this case, you may not insert "
2018 "new entries into rows owned by the calling process.");
2021 crs_graph_type& graph = *(this->myGraph_);
2022 const IST*
const inputVals =
2023 reinterpret_cast<const IST*
>(values.getRawPtr());
2024 const GO*
const inputGblColInds = indices.getRawPtr();
2025 const size_t numInputEnt = indices.size();
2026 RowInfo rowInfo = graph.getRowInfo(lclRow);
2028 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2035 const map_type& colMap = *(graph.colMap_);
2036 size_t curOffset = 0;
2037 while (curOffset < numInputEnt) {
2041 Teuchos::Array<LO> lclIndices;
2042 size_t endOffset = curOffset;
2043 for (; endOffset < numInputEnt; ++endOffset) {
2044 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2045 if (lclIndex != OTLO::invalid())
2046 lclIndices.push_back(lclIndex);
2053 const LO numIndInSeq = (endOffset - curOffset);
2054 if (numIndInSeq != 0) {
2055 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2061 const bool invariant = endOffset == numInputEnt ||
2062 colMap.getLocalElement(inputGblColInds[endOffset]) == OTLO::invalid();
2063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!invariant, std::logic_error, std::endl
2064 <<
"Invariant failed!");
2066 curOffset = endOffset + 1;
2068 }
else if (!graph.colMap_.is_null()) {
2069 const map_type& colMap = *(graph.colMap_);
2070 size_t curOffset = 0;
2071 while (curOffset < numInputEnt) {
2075 size_t endOffset = curOffset;
2076 for (; endOffset < numInputEnt &&
2077 colMap.getLocalElement(inputGblColInds[endOffset]) != OTLO::invalid();
2083 const LO numIndInSeq = (endOffset - curOffset);
2084 if (numIndInSeq != 0) {
2085 rowInfo = graph.getRowInfo(lclRow);
2086 this->insertGlobalValuesImpl(graph, rowInfo,
2087 inputGblColInds + curOffset,
2088 inputVals + curOffset,
2095 const bool invariant = endOffset == numInputEnt ||
2096 colMap.getLocalElement(inputGblColInds[endOffset]) == OTLO::invalid();
2097 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!invariant, std::logic_error, std::endl
2098 <<
"Invariant failed!");
2100 curOffset = endOffset + 1;
2103 this->insertGlobalValuesImpl(graph, rowInfo, inputGblColInds,
2104 inputVals, numInputEnt);
2109 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2110 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2111 insertGlobalValuesFilteredChecked(
2112 const GlobalOrdinal gblRow,
2113 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2114 const Teuchos::ArrayView<const Scalar>& values,
2115 const char*
const prefix,
2117 const bool verbose) {
2122 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2123 }
catch (std::exception& e) {
2124 std::ostringstream os;
2126 const size_t maxNumToPrint =
2128 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2131 <<
"Global row index: " << gblRow << endl;
2138 os <<
": insertGlobalValuesFiltered threw an exception: "
2141 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2145 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2151 const LocalOrdinal inds[],
2153 const LocalOrdinal numElts) {
2154 typedef LocalOrdinal LO;
2155 typedef GlobalOrdinal GO;
2156 const bool sorted = graph.
isSorted();
2166 for (LO j = 0; j < numElts; ++j) {
2167 const LO lclColInd = inds[j];
2168 const size_t offset =
2169 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2170 lclColInd, hint, sorted);
2171 if (offset != rowInfo.numEntries) {
2172 rowVals[offset] = newVals[j];
2178 if (graph.
colMap_.is_null()) {
2179 return Teuchos::OrdinalTraits<LO>::invalid();
2187 for (LO j = 0; j < numElts; ++j) {
2189 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid()) {
2190 const size_t offset =
2191 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2192 gblColInd, hint, sorted);
2193 if (offset != rowInfo.numEntries) {
2194 rowVals[offset] = newVals[j];
2213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2217 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2218 const Teuchos::ArrayView<const Scalar>& vals) {
2219 typedef LocalOrdinal LO;
2221 const LO numInputEnt =
static_cast<LO
>(lclCols.size());
2222 if (static_cast<LO>(vals.size()) != numInputEnt) {
2223 return Teuchos::OrdinalTraits<LO>::invalid();
2225 const LO*
const inputInds = lclCols.getRawPtr();
2226 const Scalar*
const inputVals = vals.getRawPtr();
2227 return this->replaceLocalValues(localRow, numInputEnt,
2228 inputVals, inputInds);
2231 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2237 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2238 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals) {
2240 const LO numInputEnt = inputInds.extent(0);
2241 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2242 return Teuchos::OrdinalTraits<LO>::invalid();
2244 const Scalar*
const inVals =
2245 reinterpret_cast<const Scalar*
>(inputVals.data());
2246 return this->replaceLocalValues(localRow, numInputEnt,
2247 inVals, inputInds.data());
2250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2254 const LocalOrdinal numEnt,
2255 const Scalar inputVals[],
2256 const LocalOrdinal inputCols[]) {
2258 typedef LocalOrdinal LO;
2260 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2262 return Teuchos::OrdinalTraits<LO>::invalid();
2267 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2270 return static_cast<LO
>(0);
2272 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2273 const IST*
const inVals =
reinterpret_cast<const IST*
>(inputVals);
2274 return this->replaceLocalValuesImpl(curRowVals.data(), graph, rowInfo,
2275 inputCols, inVals, numEnt);
2278 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2284 const GlobalOrdinal inds[],
2286 const LocalOrdinal numElts) {
2287 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2289 [&](
size_t const k,
size_t const ,
size_t const offset) {
2290 rowVals[offset] = newVals[k];
2292 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2300 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2301 const Teuchos::ArrayView<const Scalar>& inputVals) {
2302 typedef LocalOrdinal LO;
2304 const LO numInputEnt =
static_cast<LO
>(inputGblColInds.size());
2305 if (static_cast<LO>(inputVals.size()) != numInputEnt) {
2306 return Teuchos::OrdinalTraits<LO>::invalid();
2308 return this->replaceGlobalValues(globalRow, numInputEnt,
2309 inputVals.getRawPtr(),
2310 inputGblColInds.getRawPtr());
2313 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2317 const LocalOrdinal numEnt,
2318 const Scalar inputVals[],
2319 const GlobalOrdinal inputGblColInds[]) {
2321 typedef LocalOrdinal LO;
2323 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2325 return Teuchos::OrdinalTraits<LO>::invalid();
2330 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2333 return static_cast<LO
>(0);
2336 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2337 const IST*
const inVals =
reinterpret_cast<const IST*
>(inputVals);
2338 return this->replaceGlobalValuesImpl(curRowVals.data(), graph, rowInfo,
2339 inputGblColInds, inVals, numEnt);
2342 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2348 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2349 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals) {
2357 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2358 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2359 return Teuchos::OrdinalTraits<LO>::invalid();
2361 const Scalar*
const inVals =
2362 reinterpret_cast<const Scalar*
>(inputVals.data());
2363 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2367 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2373 const GlobalOrdinal inds[],
2375 const LocalOrdinal numElts,
2376 const bool atomic) {
2377 typedef LocalOrdinal LO;
2378 typedef GlobalOrdinal GO;
2380 const bool sorted = graph.
isSorted();
2389 if (graph.
colMap_.is_null()) {
2400 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid();
2402 for (LO j = 0; j < numElts; ++j) {
2404 if (lclColInd != LINV) {
2405 const size_t offset =
2406 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2407 lclColInd, hint, sorted);
2408 if (offset != rowInfo.numEntries) {
2410 Kokkos::atomic_add(&rowVals[offset], newVals[j]);
2412 rowVals[offset] += newVals[j];
2424 for (LO j = 0; j < numElts; ++j) {
2425 const GO gblColInd = inds[j];
2426 const size_t offset =
2427 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2428 gblColInd, hint, sorted);
2429 if (offset != rowInfo.numEntries) {
2431 Kokkos::atomic_add(&rowVals[offset], newVals[j]);
2433 rowVals[offset] += newVals[j];
2447 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2451 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2452 const Teuchos::ArrayView<const Scalar>& inputVals,
2453 const bool atomic) {
2454 typedef LocalOrdinal LO;
2456 const LO numInputEnt =
static_cast<LO
>(inputGblColInds.size());
2457 if (static_cast<LO>(inputVals.size()) != numInputEnt) {
2458 return Teuchos::OrdinalTraits<LO>::invalid();
2460 return this->sumIntoGlobalValues(gblRow, numInputEnt,
2461 inputVals.getRawPtr(),
2462 inputGblColInds.getRawPtr(),
2466 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2470 const LocalOrdinal numInputEnt,
2471 const Scalar inputVals[],
2472 const GlobalOrdinal inputGblColInds[],
2473 const bool atomic) {
2475 typedef LocalOrdinal LO;
2476 typedef GlobalOrdinal GO;
2478 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2480 return Teuchos::OrdinalTraits<LO>::invalid();
2485 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2490 using Teuchos::ArrayView;
2491 ArrayView<const GO> inputGblColInds_av(
2492 numInputEnt == 0 ?
nullptr : inputGblColInds,
2494 ArrayView<const Scalar> inputVals_av(
2495 numInputEnt == 0 ?
nullptr : inputVals, numInputEnt);
2500 this->insertNonownedGlobalValues(gblRow, inputGblColInds_av,
2510 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2511 const IST*
const inVals =
reinterpret_cast<const IST*
>(inputVals);
2512 return this->sumIntoGlobalValuesImpl(curRowVals.data(), graph, rowInfo,
2513 inputGblColInds, inVals,
2514 numInputEnt, atomic);
2518 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2522 const LocalOrdinal numInputEnt,
2523 const impl_scalar_type inputVals[],
2524 const LocalOrdinal inputCols[],
2525 std::function<impl_scalar_type(
const impl_scalar_type&,
const impl_scalar_type&)> f,
2526 const bool atomic) {
2527 using Tpetra::Details::OrdinalTraits;
2528 typedef LocalOrdinal LO;
2530 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2532 return Teuchos::OrdinalTraits<LO>::invalid();
2534 const crs_graph_type& graph = *(this->staticGraph_);
2535 const RowInfo rowInfo = graph.getRowInfo(lclRow);
2537 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid()) {
2540 return static_cast<LO
>(0);
2542 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2543 return this->transformLocalValues(curRowVals.data(), graph,
2544 rowInfo, inputCols, inputVals,
2545 numInputEnt, f, atomic);
2548 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2550 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2551 transformGlobalValues(
const GlobalOrdinal gblRow,
2552 const LocalOrdinal numInputEnt,
2553 const impl_scalar_type inputVals[],
2554 const GlobalOrdinal inputCols[],
2555 std::function<impl_scalar_type(
const impl_scalar_type&,
const impl_scalar_type&)> f,
2556 const bool atomic) {
2557 using Tpetra::Details::OrdinalTraits;
2558 typedef LocalOrdinal LO;
2560 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2562 return OrdinalTraits<LO>::invalid();
2564 const crs_graph_type& graph = *(this->staticGraph_);
2565 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex(gblRow);
2567 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid()) {
2570 return static_cast<LO
>(0);
2572 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2573 return this->transformGlobalValues(curRowVals.data(), graph,
2574 rowInfo, inputCols, inputVals,
2575 numInputEnt, f, atomic);
2578 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2580 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2581 transformLocalValues(impl_scalar_type rowVals[],
2582 const crs_graph_type& graph,
2583 const RowInfo& rowInfo,
2584 const LocalOrdinal inds[],
2585 const impl_scalar_type newVals[],
2586 const LocalOrdinal numElts,
2587 std::function<impl_scalar_type(
const impl_scalar_type&,
const impl_scalar_type&)> f,
2588 const bool atomic) {
2589 typedef impl_scalar_type ST;
2590 typedef LocalOrdinal LO;
2591 typedef GlobalOrdinal GO;
2598 const bool sorted = graph.isSorted();
2603 if (graph.isLocallyIndexed()) {
2606 auto colInds = graph.getLocalIndsViewHost(rowInfo);
2608 for (LO j = 0; j < numElts; ++j) {
2609 const LO lclColInd = inds[j];
2610 const size_t offset =
2611 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2612 lclColInd, hint, sorted);
2613 if (offset != rowInfo.numEntries) {
2622 ST*
const dest = &rowVals[offset];
2623 (void)atomic_binary_function_update(dest, newVals[j], f);
2626 rowVals[offset] = f(rowVals[offset], newVals[j]);
2632 }
else if (graph.isGloballyIndexed()) {
2636 if (graph.colMap_.is_null()) {
2643 const map_type& colMap = *(graph.colMap_);
2646 auto colInds = graph.getGlobalIndsViewHost(rowInfo);
2648 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid();
2649 for (LO j = 0; j < numElts; ++j) {
2650 const GO gblColInd = colMap.getGlobalElement(inds[j]);
2651 if (gblColInd != GINV) {
2652 const size_t offset =
2653 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2654 gblColInd, hint, sorted);
2655 if (offset != rowInfo.numEntries) {
2664 ST*
const dest = &rowVals[offset];
2665 (void)atomic_binary_function_update(dest, newVals[j], f);
2668 rowVals[offset] = f(rowVals[offset], newVals[j]);
2683 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2685 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2686 transformGlobalValues(impl_scalar_type rowVals[],
2687 const crs_graph_type& graph,
2688 const RowInfo& rowInfo,
2689 const GlobalOrdinal inds[],
2690 const impl_scalar_type newVals[],
2691 const LocalOrdinal numElts,
2692 std::function<impl_scalar_type(
const impl_scalar_type&,
const impl_scalar_type&)> f,
2693 const bool atomic) {
2694 typedef impl_scalar_type ST;
2695 typedef LocalOrdinal LO;
2696 typedef GlobalOrdinal GO;
2703 const bool sorted = graph.isSorted();
2708 if (graph.isGloballyIndexed()) {
2711 auto colInds = graph.getGlobalIndsViewHost(rowInfo);
2713 for (LO j = 0; j < numElts; ++j) {
2714 const GO gblColInd = inds[j];
2715 const size_t offset =
2716 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2717 gblColInd, hint, sorted);
2718 if (offset != rowInfo.numEntries) {
2727 ST*
const dest = &rowVals[offset];
2728 (void)atomic_binary_function_update(dest, newVals[j], f);
2731 rowVals[offset] = f(rowVals[offset], newVals[j]);
2737 }
else if (graph.isLocallyIndexed()) {
2741 if (graph.colMap_.is_null()) {
2747 const map_type& colMap = *(graph.colMap_);
2750 auto colInds = graph.getLocalIndsViewHost(rowInfo);
2752 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid();
2753 for (LO j = 0; j < numElts; ++j) {
2754 const LO lclColInd = colMap.getLocalElement(inds[j]);
2755 if (lclColInd != LINV) {
2756 const size_t offset =
2757 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2758 lclColInd, hint, sorted);
2759 if (offset != rowInfo.numEntries) {
2768 ST*
const dest = &rowVals[offset];
2769 (void)atomic_binary_function_update(dest, newVals[j], f);
2772 rowVals[offset] = f(rowVals[offset], newVals[j]);
2787 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2793 const LocalOrdinal inds[],
2795 const LocalOrdinal numElts,
2796 const bool atomic) {
2797 typedef LocalOrdinal LO;
2798 typedef GlobalOrdinal GO;
2800 const bool sorted = graph.
isSorted();
2810 for (LO j = 0; j < numElts; ++j) {
2811 const LO lclColInd = inds[j];
2812 const size_t offset =
2813 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2814 lclColInd, hint, sorted);
2815 if (offset != rowInfo.numEntries) {
2817 Kokkos::atomic_add(&rowVals[offset], newVals[j]);
2819 rowVals[offset] += newVals[j];
2826 if (graph.
colMap_.is_null()) {
2827 return Teuchos::OrdinalTraits<LO>::invalid();
2835 for (LO j = 0; j < numElts; ++j) {
2837 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid()) {
2838 const size_t offset =
2839 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
2840 gblColInd, hint, sorted);
2841 if (offset != rowInfo.numEntries) {
2843 Kokkos::atomic_add(&rowVals[offset], newVals[j]);
2845 rowVals[offset] += newVals[j];
2865 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2869 const Teuchos::ArrayView<const LocalOrdinal>& indices,
2870 const Teuchos::ArrayView<const Scalar>& values,
2871 const bool atomic) {
2873 const LO numInputEnt =
static_cast<LO
>(indices.size());
2874 if (static_cast<LO>(values.size()) != numInputEnt) {
2875 return Teuchos::OrdinalTraits<LO>::invalid();
2877 const LO*
const inputInds = indices.getRawPtr();
2878 const scalar_type*
const inputVals = values.getRawPtr();
2879 return this->sumIntoLocalValues(localRow, numInputEnt,
2880 inputVals, inputInds, atomic);
2883 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2889 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2890 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
2891 const bool atomic) {
2893 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2894 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2895 return Teuchos::OrdinalTraits<LO>::invalid();
2898 reinterpret_cast<const scalar_type*
>(inputVals.data());
2899 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
2900 inputInds.data(), atomic);
2903 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2907 const LocalOrdinal numEnt,
2908 const Scalar vals[],
2909 const LocalOrdinal cols[],
2910 const bool atomic) {
2912 typedef LocalOrdinal LO;
2914 if (!this->isFillActive() || this->staticGraph_.is_null()) {
2916 return Teuchos::OrdinalTraits<LO>::invalid();
2921 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2924 return static_cast<LO
>(0);
2926 auto curRowVals = this->getValuesViewHostNonConst(rowInfo);
2927 const IST*
const inputVals =
reinterpret_cast<const IST*
>(vals);
2928 return this->sumIntoLocalValuesImpl(curRowVals.data(), graph, rowInfo,
2929 cols, inputVals, numEnt, atomic);
2932 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2934 values_dualv_type::t_host::const_type
2937 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
2938 return typename values_dualv_type::t_host::const_type();
2940 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
2945 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2947 values_dualv_type::t_host
2950 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
2951 return typename values_dualv_type::t_host();
2953 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
2958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2960 values_dualv_type::t_dev::const_type
2963 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
2964 return typename values_dualv_type::t_dev::const_type();
2966 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
2971 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2973 values_dualv_type::t_dev
2976 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
2977 return typename values_dualv_type::t_dev();
2979 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
2984 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2987 nonconst_local_inds_host_view_type& indices,
2988 nonconst_values_host_view_type& values,
2989 size_t& numEntries)
const {
2990 using Teuchos::ArrayView;
2991 using Teuchos::av_reinterpret_cast;
2992 const char tfecfFuncName[] =
"getLocalRowCopy: ";
2994 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::runtime_error,
2995 "The matrix does not have a column Map yet. This means we don't have "
2996 "local indices for columns yet, so it doesn't make sense to call this "
2997 "method. If the matrix doesn't have a column Map yet, you should call "
2998 "fillComplete on it first.");
3000 const RowInfo rowinfo = staticGraph_->getRowInfo(localRow);
3001 const size_t theNumEntries = rowinfo.numEntries;
3002 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) < theNumEntries ||
3003 static_cast<size_t>(values.size()) < theNumEntries,
3004 std::runtime_error,
"Row with local index " << localRow <<
" has " << theNumEntries <<
" entry/ies, but indices.size() = " << indices.size() <<
" and values.size() = " << values.size() <<
".");
3005 numEntries = theNumEntries;
3007 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
3008 if (staticGraph_->isLocallyIndexed()) {
3009 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3010 auto curVals = getValuesViewHost(rowinfo);
3012 for (
size_t j = 0; j < theNumEntries; ++j) {
3013 values[j] = curVals[j];
3014 indices[j] = curLclInds(j);
3016 }
else if (staticGraph_->isGloballyIndexed()) {
3018 const map_type& colMap = *(staticGraph_->colMap_);
3019 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3020 auto curVals = getValuesViewHost(rowinfo);
3022 for (
size_t j = 0; j < theNumEntries; ++j) {
3023 values[j] = curVals[j];
3030 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3033 nonconst_global_inds_host_view_type& indices,
3034 nonconst_values_host_view_type& values,
3035 size_t& numEntries)
const {
3036 using Teuchos::ArrayView;
3037 using Teuchos::av_reinterpret_cast;
3038 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3041 staticGraph_->getRowInfoFromGlobalRowIndex(globalRow);
3042 const size_t theNumEntries = rowinfo.numEntries;
3043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3044 static_cast<size_t>(indices.size()) < theNumEntries ||
3045 static_cast<size_t>(values.size()) < theNumEntries,
3046 std::runtime_error,
"Row with global index " << globalRow <<
" has " << theNumEntries <<
" entry/ies, but indices.size() = " << indices.size() <<
" and values.size() = " << values.size() <<
".");
3047 numEntries = theNumEntries;
3049 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
3050 if (staticGraph_->isLocallyIndexed()) {
3051 const map_type& colMap = *(staticGraph_->colMap_);
3052 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3053 auto curVals = getValuesViewHost(rowinfo);
3055 for (
size_t j = 0; j < theNumEntries; ++j) {
3056 values[j] = curVals[j];
3059 }
else if (staticGraph_->isGloballyIndexed()) {
3060 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3061 auto curVals = getValuesViewHost(rowinfo);
3063 for (
size_t j = 0; j < theNumEntries; ++j) {
3064 values[j] = curVals[j];
3065 indices[j] = curGblInds(j);
3071 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3074 local_inds_host_view_type& indices,
3075 values_host_view_type& values)
const {
3076 const char tfecfFuncName[] =
"getLocalRowView: ";
3078 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3079 isGloballyIndexed(), std::runtime_error,
3080 "The matrix currently stores "
3081 "its indices as global indices, so you cannot get a view with local "
3082 "column indices. If the matrix has a column Map, you may call "
3083 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3084 "a view with global column indices by calling getGlobalRowCopy().");
3086 const RowInfo rowInfo = staticGraph_->getRowInfo(localRow);
3087 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
3088 rowInfo.numEntries > 0) {
3089 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3093 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3099 indices = local_inds_host_view_type();
3100 values = values_host_view_type();
3103 #ifdef HAVE_TPETRA_DEBUG
3104 const char suffix[] =
3105 ". This should never happen. Please report this "
3106 "bug to the Tpetra developers.";
3107 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
3108 static_cast<size_t>(values.size()),
3110 "At the end of this method, for local row " << localRow <<
", "
3112 << indices.size() <<
" != values.size () = "
3113 << values.size() << suffix);
3114 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
3115 static_cast<size_t>(rowInfo.numEntries),
3117 "At the end of this method, for local row " << localRow <<
", "
3119 << indices.size() <<
" != rowInfo.numEntries = "
3120 << rowInfo.numEntries << suffix);
3121 const size_t expectedNumEntries = getNumEntriesInLocalRow(localRow);
3122 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowInfo.numEntries != expectedNumEntries, std::logic_error,
3124 "of this method, for local row "
3125 << localRow <<
", rowInfo.numEntries = "
3126 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " << expectedNumEntries << suffix);
3127 #endif // HAVE_TPETRA_DEBUG
3130 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3133 global_inds_host_view_type& indices,
3134 values_host_view_type& values)
const {
3135 const char tfecfFuncName[] =
"getGlobalRowView: ";
3137 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3138 isLocallyIndexed(), std::runtime_error,
3139 "The matrix is locally indexed, so we cannot return a view of the row "
3140 "with global column indices. Use getGlobalRowCopy() instead.");
3145 staticGraph_->getRowInfoFromGlobalRowIndex(globalRow);
3146 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
3147 rowInfo.numEntries > 0) {
3148 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3151 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3155 indices = global_inds_host_view_type();
3156 values = values_host_view_type();
3159 #ifdef HAVE_TPETRA_DEBUG
3160 const char suffix[] =
3161 ". This should never happen. Please report this "
3162 "bug to the Tpetra developers.";
3163 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
3164 static_cast<size_t>(values.size()),
3166 "At the end of this method, for global row " << globalRow <<
", "
3168 << indices.size() <<
" != values.size () = "
3169 << values.size() << suffix);
3170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
3171 static_cast<size_t>(rowInfo.numEntries),
3173 "At the end of this method, for global row " << globalRow <<
", "
3175 << indices.size() <<
" != rowInfo.numEntries = "
3176 << rowInfo.numEntries << suffix);
3177 const size_t expectedNumEntries = getNumEntriesInGlobalRow(globalRow);
3178 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowInfo.numEntries != expectedNumEntries, std::logic_error,
3180 "of this method, for global row "
3181 << globalRow <<
", rowInfo.numEntries "
3183 << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3185 << expectedNumEntries << suffix);
3186 #endif // HAVE_TPETRA_DEBUG
3189 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3194 const size_t nlrs = staticGraph_->getLocalNumRows();
3195 const size_t numEntries = staticGraph_->getLocalNumEntries();
3196 if (!staticGraph_->indicesAreAllocated() ||
3197 nlrs == 0 || numEntries == 0) {
3200 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3201 KokkosBlas::scal(vals, theAlpha, vals);
3205 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3214 const size_t numEntries = staticGraph_->getLocalNumEntries();
3215 if (!staticGraph_->indicesAreAllocated() || numEntries == 0) {
3222 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3226 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3228 setAllValues(
const typename local_graph_device_type::row_map_type& rowPointers,
3229 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3230 const typename local_matrix_device_type::values_type& values) {
3232 ProfilingRegion region(
"Tpetra::CrsMatrix::setAllValues");
3233 const char tfecfFuncName[] =
"setAllValues: ";
3234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(columnIndices.size() != values.size(), std::invalid_argument,
3235 "columnIndices.size() = " << columnIndices.size() <<
" != values.size()"
3237 << values.size() <<
".");
3238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_.is_null(), std::runtime_error,
"myGraph_ must not be null.");
3241 myGraph_->setAllIndices(rowPointers, columnIndices);
3242 }
catch (std::exception& e) {
3243 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3244 "myGraph_->setAllIndices() threw an "
3253 auto lclGraph = myGraph_->getLocalGraphDevice();
3254 const size_t numEnt = lclGraph.entries.extent(0);
3255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclGraph.row_map.extent(0) != rowPointers.extent(0) ||
3256 numEnt !=
static_cast<size_t>(columnIndices.extent(0)),
3258 "myGraph_->setAllIndices() did not correctly create "
3259 "local graph. Please report this bug to the Tpetra developers.");
3262 valuesUnpacked_wdv = valuesPacked_wdv;
3266 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3268 checkInternalState();
3271 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3275 ProfilingRegion region(
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3277 auto graph = localDeviceMatrix.graph;
3280 auto rows = graph.row_map;
3281 auto columns = graph.entries;
3282 auto values = localDeviceMatrix.values;
3284 setAllValues(rows, columns, values);
3287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3290 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3291 const Teuchos::ArrayRCP<Scalar>& val) {
3292 using Kokkos::Compat::getKokkosViewDeepCopy;
3293 using Teuchos::ArrayRCP;
3294 using Teuchos::av_reinterpret_cast;
3297 typedef typename local_graph_device_type::row_map_type row_map_type;
3299 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3305 typename row_map_type::non_const_type ptrNative(
"ptr", ptr.size());
3306 Kokkos::View<
const size_t*,
3307 typename row_map_type::array_layout,
3309 Kokkos::MemoryUnmanaged>
3310 ptrSizeT(ptr.getRawPtr(), ptr.size());
3313 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptrNative.extent(0) != ptrSizeT.extent(0),
3314 std::logic_error,
"ptrNative.extent(0) = " << ptrNative.extent(0) <<
" != ptrSizeT.extent(0) = " << ptrSizeT.extent(0) <<
". Please report this bug to the "
3315 "Tpetra developers.");
3317 auto indIn = getKokkosViewDeepCopy<DT>(ind());
3318 auto valIn = getKokkosViewDeepCopy<DT>(av_reinterpret_cast<IST>(val()));
3319 this->setAllValues(ptrNative, indIn, valIn);
3322 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3325 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_.is_null(), std::runtime_error,
"The matrix has no graph.");
3333 const size_t lclNumRows = staticGraph_->getLocalNumRows();
3334 if (static_cast<size_t>(offsets.size()) < lclNumRows) {
3335 offsets.resize(lclNumRows);
3341 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3346 Kokkos::MemoryUnmanaged>
3348 output_type offsetsOut(offsets.getRawPtr(), lclNumRows);
3349 staticGraph_->getLocalDiagOffsets(offsetsOut);
3351 Kokkos::View<size_t*, device_type> offsetsTmp(
"diagOffsets", lclNumRows);
3352 staticGraph_->getLocalDiagOffsets(offsetsTmp);
3353 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3354 Kokkos::MemoryUnmanaged>
3356 output_type offsetsOut(offsets.getRawPtr(), lclNumRows);
3362 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3365 using Teuchos::ArrayRCP;
3366 using Teuchos::ArrayView;
3367 using Teuchos::av_reinterpret_cast;
3368 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3371 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3372 staticGraph_.is_null(), std::runtime_error,
3373 "This method requires that the matrix have a graph.");
3374 auto rowMapPtr = this->getRowMap();
3375 if (rowMapPtr.is_null() || rowMapPtr->getComm().is_null()) {
3381 auto colMapPtr = this->getColMap();
3382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap() || colMapPtr.is_null(), std::runtime_error,
3383 "This method requires that the matrix have a column Map.");
3384 const map_type& rowMap = *rowMapPtr;
3385 const map_type& colMap = *colMapPtr;
3386 const LO myNumRows =
static_cast<LO
>(this->getLocalNumRows());
3388 #ifdef HAVE_TPETRA_DEBUG
3391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3392 !diag.
getMap()->isCompatible(rowMap), std::runtime_error,
3393 "The input Vector's Map must be compatible with the CrsMatrix's row "
3394 "Map. You may check this by using Map's isCompatible method: "
3395 "diag.getMap ()->isCompatible (A.getRowMap ());");
3396 #endif // HAVE_TPETRA_DEBUG
3400 const auto D_lcl_1d =
3401 Kokkos::subview(D_lcl, Kokkos::make_pair(LO(0), myNumRows), 0);
3403 const auto lclRowMap = rowMap.getLocalMap();
3408 getLocalMatrixDevice());
3411 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3415 Kokkos::MemoryUnmanaged>& offsets)
const {
3416 typedef LocalOrdinal LO;
3418 #ifdef HAVE_TPETRA_DEBUG
3419 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3420 const map_type& rowMap = *(this->getRowMap());
3423 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3424 !diag.
getMap()->isCompatible(rowMap), std::runtime_error,
3425 "The input Vector's Map must be compatible with (in the sense of Map::"
3426 "isCompatible) the CrsMatrix's row Map.");
3427 #endif // HAVE_TPETRA_DEBUG
3437 const LO myNumRows =
static_cast<LO
>(this->getLocalNumRows());
3440 Kokkos::subview(D_lcl, Kokkos::make_pair(LO(0), myNumRows), 0);
3442 KokkosSparse::getDiagCopy(D_lcl_1d, offsets,
3443 getLocalMatrixDevice());
3446 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3449 const Teuchos::ArrayView<const size_t>& offsets)
const {
3450 using LO = LocalOrdinal;
3451 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3454 #ifdef HAVE_TPETRA_DEBUG
3455 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3456 const map_type& rowMap = *(this->getRowMap());
3459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3460 !diag.
getMap()->isCompatible(rowMap), std::runtime_error,
3461 "The input Vector's Map must be compatible with (in the sense of Map::"
3462 "isCompatible) the CrsMatrix's row Map.");
3463 #endif // HAVE_TPETRA_DEBUG
3475 auto lclVecHost1d = Kokkos::subview(lclVecHost, Kokkos::ALL(), 0);
3477 using host_offsets_view_type =
3478 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3479 Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
3480 host_offsets_view_type h_offsets(offsets.getRawPtr(), offsets.size());
3482 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3483 const LO myNumRows =
static_cast<LO
>(this->getLocalNumRows());
3484 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid();
3486 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3487 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3488 Kokkos::parallel_for(
"Tpetra::CrsMatrix::getLocalDiagCopy",
3489 range_type(0, myNumRows),
3490 [&, INV, h_offsets](
const LO lclRow) {
3491 lclVecHost1d(lclRow) = STS::zero();
3492 if (h_offsets[lclRow] != INV) {
3493 auto curRowOffset = rowPtrsPackedHost(lclRow);
3494 lclVecHost1d(lclRow) =
3495 static_cast<IST
>(valuesPackedHost(curRowOffset + h_offsets[lclRow]));
3501 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3504 using Teuchos::ArrayRCP;
3505 using Teuchos::ArrayView;
3506 using Teuchos::null;
3509 using Teuchos::rcpFromRef;
3510 using ::Tpetra::Details::ProfilingRegion;
3512 const char tfecfFuncName[] =
"leftScale: ";
3514 ProfilingRegion region(
"Tpetra::CrsMatrix::leftScale");
3516 RCP<const vec_type> xp;
3517 if (this->getRangeMap()->isSameAs(*(x.
getMap()))) {
3520 auto exporter = this->getCrsGraphRef().getExporter();
3521 if (exporter.get() !=
nullptr) {
3522 RCP<vec_type> tempVec(
new vec_type(this->getRowMap()));
3523 tempVec->doImport(x, *exporter,
REPLACE);
3528 }
else if (this->getRowMap()->isSameAs(*(x.
getMap()))) {
3531 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument,
3532 "x's Map must be the same as "
3533 "either the row Map or the range Map of the CrsMatrix.");
3536 if (this->isFillComplete()) {
3537 auto x_lcl = xp->getLocalViewDevice(Access::ReadOnly);
3538 auto x_lcl_1d = Kokkos::subview(x_lcl, Kokkos::ALL(), 0);
3541 x_lcl_1d,
false,
false);
3544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3545 "CrsMatrix::leftScale requires matrix to be"
3550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3553 using Teuchos::ArrayRCP;
3554 using Teuchos::ArrayView;
3555 using Teuchos::null;
3558 using Teuchos::rcpFromRef;
3559 using ::Tpetra::Details::ProfilingRegion;
3561 const char tfecfFuncName[] =
"rightScale: ";
3563 ProfilingRegion region(
"Tpetra::CrsMatrix::rightScale");
3565 RCP<const vec_type> xp;
3566 if (this->getDomainMap()->isSameAs(*(x.
getMap()))) {
3569 auto importer = this->getCrsGraphRef().getImporter();
3570 if (importer.get() !=
nullptr) {
3571 RCP<vec_type> tempVec(
new vec_type(this->getColMap()));
3572 tempVec->doImport(x, *importer,
REPLACE);
3577 }
else if (this->getColMap()->isSameAs(*(x.
getMap()))) {
3580 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3581 "x's Map must be the same as "
3582 "either the domain Map or the column Map of the CrsMatrix.");
3585 if (this->isFillComplete()) {
3586 auto x_lcl = xp->getLocalViewDevice(Access::ReadOnly);
3587 auto x_lcl_1d = Kokkos::subview(x_lcl, Kokkos::ALL(), 0);
3590 x_lcl_1d,
false,
false);
3593 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3594 "CrsMatrix::rightScale requires matrix to be"
3599 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3604 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3605 Kokkos::parallel_reduce(
3606 "getNormInf", range_type(0, equilInfo.rowNorms.extent(0)),
3608 max = equilInfo.rowNorms(i);
3610 Kokkos::Max<mag_type>(myMax));
3612 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3613 Teuchos::outArg(totalMax));
3617 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3621 if (assumeSymmetric)
3622 return getNormInf();
3625 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3626 Kokkos::parallel_reduce(
3627 "getNorm1", range_type(0, equilInfo.colNorms.extent(0)),
3629 max = equilInfo.colNorms(i);
3631 Kokkos::Max<mag_type>(myMax));
3633 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3634 Teuchos::outArg(totalMax));
3638 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3642 using Teuchos::ArrayView;
3643 using Teuchos::outArg;
3644 using Teuchos::REDUCE_SUM;
3645 using Teuchos::reduceAll;
3653 if (getLocalNumEntries() > 0) {
3654 if (isStorageOptimized()) {
3657 const size_t numEntries = getLocalNumEntries();
3658 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3659 for (
size_t k = 0; k < numEntries; ++k) {
3660 auto val = values[k];
3664 const mag_type val_abs = STS::abs(val);
3665 mySum += val_abs * val_abs;
3668 const LocalOrdinal numRows =
3669 static_cast<LocalOrdinal
>(this->getLocalNumRows());
3670 for (LocalOrdinal r = 0; r < numRows; ++r) {
3671 const RowInfo rowInfo = myGraph_->getRowInfo(r);
3672 const size_t numEntries = rowInfo.numEntries;
3673 auto A_r = this->getValuesViewHost(rowInfo);
3674 for (
size_t k = 0; k < numEntries; ++k) {
3676 const mag_type val_abs = STS::abs(val);
3677 mySum += val_abs * val_abs;
3683 reduceAll<int, mag_type>(*(getComm()), REDUCE_SUM,
3684 mySum, outArg(totalSum));
3685 return STM::sqrt(totalSum);
3688 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3691 const char tfecfFuncName[] =
"replaceColMap: ";
3695 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3696 myGraph_.is_null(), std::runtime_error,
3697 "This method does not work if the matrix has a const graph. The whole "
3698 "idea of a const graph is that you are not allowed to change it, but "
3699 "this method necessarily must modify the graph, since the graph owns "
3700 "the matrix's column Map.");
3701 myGraph_->replaceColMap(newColMap);
3704 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3707 const Teuchos::RCP<const map_type>& newColMap,
3708 const Teuchos::RCP<const import_type>& newImport,
3709 const bool sortEachRow) {
3710 const char tfecfFuncName[] =
"reindexColumns: ";
3711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3712 graph ==
nullptr && myGraph_.is_null(), std::invalid_argument,
3713 "The input graph is null, but the matrix does not own its graph.");
3715 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3716 const bool sortGraph =
false;
3720 if (sortEachRow && theGraph.isLocallyIndexed() && !theGraph.isSorted()) {
3721 const LocalOrdinal lclNumRows =
3722 static_cast<LocalOrdinal
>(theGraph.getLocalNumRows());
3724 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3725 const RowInfo rowInfo = theGraph.getRowInfo(row);
3726 auto lclColInds = theGraph.getLocalIndsViewHostNonConst(rowInfo);
3727 auto vals = this->getValuesViewHostNonConst(rowInfo);
3729 sort2(lclColInds.data(),
3730 lclColInds.data() + rowInfo.numEntries,
3733 theGraph.indicesAreSorted_ =
true;
3737 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3740 const char tfecfFuncName[] =
"replaceDomainMap: ";
3741 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3742 myGraph_.is_null(), std::runtime_error,
3743 "This method does not work if the matrix has a const graph. The whole "
3744 "idea of a const graph is that you are not allowed to change it, but this"
3745 " method necessarily must modify the graph, since the graph owns the "
3746 "matrix's domain Map and Import objects.");
3747 myGraph_->replaceDomainMap(newDomainMap);
3750 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3753 Teuchos::RCP<const import_type>& newImporter) {
3754 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3756 myGraph_.is_null(), std::runtime_error,
3757 "This method does not work if the matrix has a const graph. The whole "
3758 "idea of a const graph is that you are not allowed to change it, but this"
3759 " method necessarily must modify the graph, since the graph owns the "
3760 "matrix's domain Map and Import objects.");
3761 myGraph_->replaceDomainMapAndImporter(newDomainMap, newImporter);
3764 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3767 const char tfecfFuncName[] =
"replaceRangeMap: ";
3768 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3769 myGraph_.is_null(), std::runtime_error,
3770 "This method does not work if the matrix has a const graph. The whole "
3771 "idea of a const graph is that you are not allowed to change it, but this"
3772 " method necessarily must modify the graph, since the graph owns the "
3773 "matrix's domain Map and Import objects.");
3774 myGraph_->replaceRangeMap(newRangeMap);
3777 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3780 Teuchos::RCP<const export_type>& newExporter) {
3781 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3783 myGraph_.is_null(), std::runtime_error,
3784 "This method does not work if the matrix has a const graph. The whole "
3785 "idea of a const graph is that you are not allowed to change it, but this"
3786 " method necessarily must modify the graph, since the graph owns the "
3787 "matrix's domain Map and Import objects.");
3788 myGraph_->replaceRangeMapAndExporter(newRangeMap, newExporter);
3791 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3794 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3795 const Teuchos::ArrayView<const Scalar>& values) {
3796 using Teuchos::Array;
3797 typedef GlobalOrdinal GO;
3798 typedef typename Array<GO>::size_type size_type;
3800 const size_type numToInsert = indices.size();
3803 std::pair<Array<GO>, Array<Scalar>>& curRow = nonlocals_[globalRow];
3804 Array<GO>& curRowInds = curRow.first;
3805 Array<Scalar>& curRowVals = curRow.second;
3806 const size_type newCapacity = curRowInds.size() + numToInsert;
3807 curRowInds.reserve(newCapacity);
3808 curRowVals.reserve(newCapacity);
3809 for (size_type k = 0; k < numToInsert; ++k) {
3810 curRowInds.push_back(indices[k]);
3811 curRowVals.push_back(values[k]);
3815 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3821 using Teuchos::Comm;
3822 using Teuchos::outArg;
3825 using Teuchos::REDUCE_MAX;
3826 using Teuchos::REDUCE_MIN;
3827 using Teuchos::reduceAll;
3830 typedef GlobalOrdinal GO;
3831 typedef typename Teuchos::Array<GO>::size_type size_type;
3832 const char tfecfFuncName[] =
"globalAssemble: ";
3833 ProfilingRegion regionGlobalAssemble(
"Tpetra::CrsMatrix::globalAssemble");
3835 const bool verbose = Behavior::verbose(
"CrsMatrix");
3836 std::unique_ptr<std::string> prefix;
3838 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
3839 std::ostringstream os;
3840 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
3842 std::cerr << os.str();
3844 RCP<const Comm<int>> comm = getComm();
3846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error,
3847 "Fill must be active before "
3848 "you may call this method.");
3850 const size_t myNumNonlocalRows = nonlocals_.size();
3857 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3858 int someoneHasNonlocalRows = 0;
3859 reduceAll<int, int>(*comm, REDUCE_MAX, iHaveNonlocalRows,
3860 outArg(someoneHasNonlocalRows));
3861 if (someoneHasNonlocalRows == 0) {
3875 RCP<const map_type> nonlocalRowMap;
3876 Teuchos::Array<size_t> numEntPerNonlocalRow(myNumNonlocalRows);
3878 Teuchos::Array<GO> myNonlocalGblRows(myNumNonlocalRows);
3879 size_type curPos = 0;
3880 for (
auto mapIter = nonlocals_.begin(); mapIter != nonlocals_.end();
3881 ++mapIter, ++curPos) {
3882 myNonlocalGblRows[curPos] = mapIter->first;
3885 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3886 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3893 sort2(gblCols.begin(), gblCols.end(), vals.begin());
3894 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
3895 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
3896 merge2(gblCols_newEnd, vals_newEnd,
3897 gblCols.begin(), gblCols.end(),
3898 vals.begin(), vals.end());
3899 gblCols.erase(gblCols_newEnd, gblCols.end());
3900 vals.erase(vals_newEnd, vals.end());
3901 numEntPerNonlocalRow[curPos] = gblCols.size();
3912 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max();
3914 auto iter = std::min_element(myNonlocalGblRows.begin(),
3915 myNonlocalGblRows.end());
3916 if (iter != myNonlocalGblRows.end()) {
3917 myMinNonlocalGblRow = *iter;
3920 GO gblMinNonlocalGblRow = 0;
3921 reduceAll<int, GO>(*comm, REDUCE_MIN, myMinNonlocalGblRow,
3922 outArg(gblMinNonlocalGblRow));
3923 const GO indexBase = gblMinNonlocalGblRow;
3924 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
3925 nonlocalRowMap = rcp(
new map_type(INV, myNonlocalGblRows(), indexBase, comm));
3934 std::ostringstream os;
3935 os << *prefix <<
"Create nonlocal matrix" << endl;
3936 std::cerr << os.str();
3938 RCP<crs_matrix_type> nonlocalMatrix =
3939 rcp(
new crs_matrix_type(nonlocalRowMap, numEntPerNonlocalRow()));
3941 size_type curPos = 0;
3942 for (
auto mapIter = nonlocals_.begin(); mapIter != nonlocals_.end();
3943 ++mapIter, ++curPos) {
3944 const GO gblRow = mapIter->first;
3946 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3947 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3949 nonlocalMatrix->insertGlobalValues(gblRow, gblCols(), vals());
3961 auto origRowMap = this->getRowMap();
3962 const bool origRowMapIsOneToOne = origRowMap->isOneToOne();
3964 int isLocallyComplete = 1;
3966 if (origRowMapIsOneToOne) {
3968 std::ostringstream os;
3969 os << *prefix <<
"Original row Map is 1-to-1" << endl;
3970 std::cerr << os.str();
3972 export_type exportToOrig(nonlocalRowMap, origRowMap);
3974 isLocallyComplete = 0;
3977 std::ostringstream os;
3978 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
3979 std::cerr << os.str();
3981 this->doExport(*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
3985 std::ostringstream os;
3986 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
3987 std::cerr << os.str();
3994 export_type exportToOneToOne(nonlocalRowMap, oneToOneRowMap);
3996 isLocallyComplete = 0;
4004 std::ostringstream os;
4005 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4007 std::cerr << os.str();
4009 crs_matrix_type oneToOneMatrix(oneToOneRowMap, 0);
4011 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4017 std::ostringstream os;
4018 os << *prefix <<
"Free nonlocalMatrix" << endl;
4019 std::cerr << os.str();
4021 nonlocalMatrix = Teuchos::null;
4025 std::ostringstream os;
4026 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4027 std::cerr << os.str();
4029 import_type importToOrig(oneToOneRowMap, origRowMap);
4030 this->doImport(oneToOneMatrix, importToOrig,
Tpetra::ADD);
4038 std::ostringstream os;
4039 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4040 std::cerr << os.str();
4042 decltype(nonlocals_) newNonlocals;
4043 std::swap(nonlocals_, newNonlocals);
4052 int isGloballyComplete = 0;
4053 reduceAll<int, int>(*comm, REDUCE_MIN, isLocallyComplete,
4054 outArg(isGloballyComplete));
4055 TEUCHOS_TEST_FOR_EXCEPTION(isGloballyComplete != 1, std::runtime_error,
4056 "On at least one process, "
4057 "you called insertGlobalValues with a global row index which is not in "
4058 "the matrix's row Map on any process in its communicator.");
4061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4064 if (!isStaticGraph()) {
4065 myGraph_->resumeFill(params);
4068 applyHelper.reset();
4069 fillComplete_ =
false;
4072 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4075 return getCrsGraphRef().haveGlobalConstants();
4078 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4081 const char tfecfFuncName[] =
"fillComplete(params): ";
4083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getCrsGraph().is_null(), std::logic_error,
4084 "getCrsGraph() returns null. This should not happen at this point. "
4085 "Please report this bug to the Tpetra developers.");
4094 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap();
4095 Teuchos::RCP<const map_type> domainMap = rangeMap;
4096 this->fillComplete(domainMap, rangeMap, params);
4100 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4103 const Teuchos::RCP<const map_type>& rangeMap,
4104 const Teuchos::RCP<Teuchos::ParameterList>& params) {
4108 using Teuchos::ArrayRCP;
4111 const char tfecfFuncName[] =
"fillComplete: ";
4112 ProfilingRegion regionFillComplete(
"Tpetra::CrsMatrix::fillComplete");
4113 const bool verbose = Behavior::verbose(
"CrsMatrix");
4114 std::unique_ptr<std::string> prefix;
4116 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4117 std::ostringstream os;
4118 os << *prefix << endl;
4119 std::cerr << os.str();
4122 "Tpetra::CrsMatrix::fillCompete",
4125 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive() || this->isFillComplete(), std::runtime_error,
4126 "Matrix fill state must be active (isFillActive() "
4127 "must be true) before you may call fillComplete().");
4128 const int numProcs = this->getComm()->getSize();
4138 bool assertNoNonlocalInserts =
false;
4141 bool sortGhosts =
true;
4143 if (!params.is_null()) {
4144 assertNoNonlocalInserts = params->get(
"No Nonlocal Changes",
4145 assertNoNonlocalInserts);
4146 if (params->isParameter(
"sort column map ghost gids")) {
4147 sortGhosts = params->get(
"sort column map ghost gids", sortGhosts);
4148 }
else if (params->isParameter(
"Sort column Map ghost GIDs")) {
4149 sortGhosts = params->get(
"Sort column Map ghost GIDs", sortGhosts);
4154 const bool needGlobalAssemble = !assertNoNonlocalInserts && numProcs > 1;
4156 if (!this->myGraph_.is_null()) {
4157 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4160 if (!this->getCrsGraphRef().indicesAreAllocated()) {
4161 if (this->hasColMap()) {
4162 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4164 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4169 if (needGlobalAssemble) {
4170 this->globalAssemble();
4172 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numProcs == 1 && nonlocals_.size() > 0,
4174 "Cannot have nonlocal entries on a serial run. "
4175 "An invalid entry (i.e., with row index not in the row Map) must have "
4176 "been submitted to the CrsMatrix.");
4179 if (this->isStaticGraph()) {
4187 #ifdef HAVE_TPETRA_DEBUG
4205 const bool domainMapsMatch =
4206 this->staticGraph_->getDomainMap()->isSameAs(*domainMap);
4207 const bool rangeMapsMatch =
4208 this->staticGraph_->getRangeMap()->isSameAs(*rangeMap);
4210 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!domainMapsMatch, std::runtime_error,
4211 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4212 "The graph cannot be changed because it was given to the CrsMatrix "
4213 "constructor as const. You can fix this by passing in the graph's "
4214 "domain Map and range Map to the matrix's fillComplete call.");
4216 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!rangeMapsMatch, std::runtime_error,
4217 "The CrsMatrix's range Map does not match the graph's range Map. "
4218 "The graph cannot be changed because it was given to the CrsMatrix "
4219 "constructor as const. You can fix this by passing in the graph's "
4220 "domain Map and range Map to the matrix's fillComplete call.");
4221 #endif // HAVE_TPETRA_DEBUG
4225 this->fillLocalMatrix(params);
4232 this->myGraph_->setDomainRangeMaps(domainMap, rangeMap);
4235 Teuchos::Array<int> remotePIDs(0);
4236 const bool mustBuildColMap = !this->hasColMap();
4237 if (mustBuildColMap) {
4238 this->myGraph_->makeColMap(remotePIDs);
4243 const std::pair<size_t, std::string> makeIndicesLocalResult =
4244 this->myGraph_->makeIndicesLocal(verbose);
4249 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(makeIndicesLocalResult.first != 0, std::runtime_error,
4250 makeIndicesLocalResult.second);
4252 const bool sorted = this->myGraph_->isSorted();
4253 const bool merged = this->myGraph_->isMerged();
4254 this->sortAndMergeIndicesAndValues(sorted, merged);
4259 this->myGraph_->makeImportExport(remotePIDs, mustBuildColMap);
4263 this->fillLocalGraphAndMatrix(params);
4265 const bool callGraphComputeGlobalConstants = params.get() ==
nullptr ||
4266 params->get(
"compute global constants",
true);
4267 if (callGraphComputeGlobalConstants) {
4268 this->myGraph_->computeGlobalConstants();
4270 this->myGraph_->computeLocalConstants();
4272 this->myGraph_->fillComplete_ =
true;
4273 this->myGraph_->checkInternalState();
4278 this->fillComplete_ =
true;
4281 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState");
4282 this->checkInternalState();
4286 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4289 const Teuchos::RCP<const map_type>& rangeMap,
4290 const Teuchos::RCP<const import_type>& importer,
4291 const Teuchos::RCP<const export_type>& exporter,
4292 const Teuchos::RCP<Teuchos::ParameterList>& params) {
4293 #ifdef HAVE_TPETRA_MMM_TIMINGS
4295 if (!params.is_null())
4296 label = params->get(
"Timer Label", label);
4297 std::string prefix = std::string(
"Tpetra ") + label + std::string(
": ");
4298 using Teuchos::TimeMonitor;
4300 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4303 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4304 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive() || isFillComplete(),
4306 "Matrix fill state must be active (isFillActive() "
4307 "must be true) before calling fillComplete().");
4308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4309 myGraph_.is_null(), std::logic_error,
"myGraph_ is null. This is not allowed.");
4312 #ifdef HAVE_TPETRA_MMM_TIMINGS
4313 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4316 myGraph_->expertStaticFillComplete(domainMap, rangeMap, importer, exporter, params);
4320 #ifdef HAVE_TPETRA_MMM_TIMINGS
4321 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4324 fillLocalGraphAndMatrix(params);
4329 fillComplete_ =
true;
4332 #ifdef HAVE_TPETRA_DEBUG
4333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4334 ": We're at the end of fillComplete(), but isFillActive() is true. "
4335 "Please report this bug to the Tpetra developers.");
4336 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillComplete(), std::logic_error,
4337 ": We're at the end of fillComplete(), but isFillActive() is true. "
4338 "Please report this bug to the Tpetra developers.");
4339 #endif // HAVE_TPETRA_DEBUG
4341 #ifdef HAVE_TPETRA_MMM_TIMINGS
4342 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4345 checkInternalState();
4349 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4354 LocalOrdinal* beg = cols;
4355 LocalOrdinal* end = cols + rowLen;
4356 LocalOrdinal* newend = beg;
4358 LocalOrdinal* cur = beg + 1;
4362 while (cur != end) {
4363 if (*cur != *newend) {
4379 return newend - beg;
4382 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4385 using ::Tpetra::Details::ProfilingRegion;
4386 typedef LocalOrdinal LO;
4387 typedef typename Kokkos::View<LO*, device_type>::host_mirror_type::execution_space
4388 host_execution_space;
4389 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4390 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4391 ProfilingRegion regionSAM(
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4393 if (!sorted || !merged) {
4394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStaticGraph(), std::runtime_error,
4395 "Cannot sort or merge with "
4396 "\"static\" (const) graph, since the matrix does not own the graph.");
4397 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->myGraph_.is_null(), std::logic_error,
4398 "myGraph_ is null, but "
4399 "this matrix claims ! isStaticGraph(). "
4400 "Please report this bug to the Tpetra developers.");
4401 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized(), std::logic_error,
4402 "It is invalid to call "
4403 "this method if the graph's storage has already been optimized. "
4404 "Please report this bug to the Tpetra developers.");
4407 const LO lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
4408 size_t totalNumDups = 0;
4413 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4415 Kokkos::parallel_reduce(
4416 "sortAndMergeIndicesAndValues", range_type(0, lclNumRows),
4417 [=](
const LO lclRow,
size_t& numDups) {
4418 size_t rowBegin = rowBegins_(lclRow);
4419 size_t rowLen = rowLengths_(lclRow);
4420 LO* cols = cols_.data() + rowBegin;
4423 sort2(cols, cols + rowLen, vals);
4426 size_t newRowLength = mergeRowIndicesAndValues(rowLen, cols, vals);
4427 rowLengths_(lclRow) = newRowLength;
4428 numDups += rowLen - newRowLength;
4442 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4447 Scalar beta)
const {
4450 using Teuchos::rcp_const_cast;
4451 using Teuchos::rcpFromRef;
4453 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
4454 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
4460 if (alpha == ZERO) {
4463 }
else if (beta != ONE) {
4477 RCP<const import_type> importer = this->getGraph()->getImporter();
4478 RCP<const export_type> exporter = this->getGraph()->getExporter();
4484 const bool Y_is_overwritten = (beta ==
ZERO);
4487 const bool Y_is_replicated =
4496 if (Y_is_replicated && this->getComm()->getRank() > 0) {
4503 RCP<const MV> X_colMap;
4504 if (importer.is_null()) {
4512 RCP<MV> X_colMapNonConst = getColumnMapMultiVector(X_in,
true);
4514 X_colMap = rcp_const_cast<
const MV>(X_colMapNonConst);
4518 X_colMap = rcpFromRef(X_in);
4521 ProfilingRegion regionImport(
"Tpetra::CrsMatrix::apply: Import");
4527 RCP<MV> X_colMapNonConst = getColumnMapMultiVector(X_in);
4530 X_colMapNonConst->doImport(X_in, *importer,
INSERT);
4531 X_colMap = rcp_const_cast<
const MV>(X_colMapNonConst);
4538 RCP<MV> Y_rowMap = getRowMapMultiVector(Y_in);
4545 if (!exporter.is_null()) {
4546 this->localApply(*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4548 ProfilingRegion regionExport(
"Tpetra::CrsMatrix::apply: Export");
4554 if (Y_is_overwritten) {
4578 Y_rowMap = getRowMapMultiVector(Y_in,
true);
4585 this->localApply(*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4588 this->localApply(*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4596 if (Y_is_replicated) {
4597 ProfilingRegion regionReduce(
"Tpetra::CrsMatrix::apply: Reduce Y");
4602 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4606 const Teuchos::ETransp mode,
4608 Scalar beta)
const {
4609 using Teuchos::null;
4612 using Teuchos::rcp_const_cast;
4613 using Teuchos::rcpFromRef;
4615 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
4618 if (alpha == ZERO) {
4640 RCP<const import_type> importer = this->getGraph()->getImporter();
4641 RCP<const export_type> exporter = this->getGraph()->getExporter();
4646 const bool Y_is_replicated = (!Y_in.
isDistributed() && this->getComm()->getSize() != 1);
4647 const bool Y_is_overwritten = (beta ==
ZERO);
4648 if (Y_is_replicated && this->getComm()->getRank() > 0) {
4654 X = rcp(
new MV(X_in, Teuchos::Copy));
4656 X = rcpFromRef(X_in);
4660 if (importer != Teuchos::null) {
4661 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4664 if (importMV_ == null) {
4665 importMV_ = rcp(
new MV(this->getColMap(), numVectors));
4668 if (exporter != Teuchos::null) {
4669 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4672 if (exportMV_ == null) {
4673 exportMV_ = rcp(
new MV(this->getRowMap(), numVectors));
4679 if (!exporter.is_null()) {
4680 ProfilingRegion regionImport(
"Tpetra::CrsMatrix::apply (transpose): Import");
4681 exportMV_->doImport(X_in, *exporter,
INSERT);
4688 if (importer != Teuchos::null) {
4689 ProfilingRegion regionExport(
"Tpetra::CrsMatrix::apply (transpose): Export");
4696 importMV_->putScalar(ZERO);
4698 this->localApply(*X, *importMV_, mode, alpha, ZERO);
4700 if (Y_is_overwritten) {
4717 MV Y(Y_in, Teuchos::Copy);
4718 this->localApply(*X, Y, mode, alpha, beta);
4721 this->localApply(*X, Y_in, mode, alpha, beta);
4728 if (Y_is_replicated) {
4729 ProfilingRegion regionReduce(
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4734 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4738 const Teuchos::ETransp mode,
4739 const Scalar& alpha,
4740 const Scalar& beta)
const {
4741 using Teuchos::NO_TRANS;
4743 ProfilingRegion regionLocalApply(
"Tpetra::CrsMatrix::localApply");
4750 const char tfecfFuncName[] =
"localApply: ";
4753 "Y.getNumVectors() = "
4755 const bool transpose = (mode != Teuchos::NO_TRANS);
4756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!transpose && X.
getLocalLength() !=
4757 getColMap()->getLocalNumElements(),
4759 "NO_TRANS case: X has the wrong number of local rows. "
4760 "X.getLocalLength() = "
4762 "getColMap()->getLocalNumElements() = "
4763 << getColMap()->getLocalNumElements() <<
".");
4764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!transpose && Y.
getLocalLength() !=
4765 getRowMap()->getLocalNumElements(),
4767 "NO_TRANS case: Y has the wrong number of local rows. "
4768 "Y.getLocalLength() = "
4770 "getRowMap()->getLocalNumElements() = "
4771 << getRowMap()->getLocalNumElements() <<
".");
4772 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(transpose && X.
getLocalLength() !=
4773 getRowMap()->getLocalNumElements(),
4775 "TRANS or CONJ_TRANS case: X has the wrong number of local "
4776 "rows. X.getLocalLength() = "
4778 <<
" != getRowMap()->getLocalNumElements() = "
4779 << getRowMap()->getLocalNumElements() <<
".");
4780 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(transpose && Y.
getLocalLength() !=
4781 getColMap()->getLocalNumElements(),
4783 "TRANS or CONJ_TRANS case: X has the wrong number of local "
4784 "rows. Y.getLocalLength() = "
4786 <<
" != getColMap()->getLocalNumElements() = "
4787 << getColMap()->getLocalNumElements() <<
".");
4788 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillComplete(), std::runtime_error,
4789 "The matrix is not "
4790 "fill complete. You must call fillComplete() (possibly with "
4791 "domain and range Map arguments) without an intervening "
4792 "resumeFill() call before you may call this method.");
4794 std::runtime_error,
"X and Y must be constant stride.");
4799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(X_lcl.data() == Y_lcl.data() && X_lcl.data() !=
nullptr && X_lcl.extent(0) != 0,
4800 std::runtime_error,
"X and Y may not alias one another.");
4803 auto A_lcl = getLocalMatrixDevice();
4805 if (!applyHelper.get()) {
4808 bool useMergePath =
false;
4809 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
4815 if constexpr (std::is_same_v<execution_space, Kokkos::Cuda>) {
4816 LocalOrdinal nrows = getLocalNumRows();
4817 LocalOrdinal maxRowImbalance = 0;
4819 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
4822 useMergePath =
true;
4825 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
4826 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
4830 const char* modeKK =
nullptr;
4832 case Teuchos::NO_TRANS:
4833 modeKK = KokkosSparse::NoTranspose;
4835 case Teuchos::TRANS:
4836 modeKK = KokkosSparse::Transpose;
4838 case Teuchos::CONJ_TRANS:
4839 modeKK = KokkosSparse::ConjugateTranspose;
4842 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
4845 if (applyHelper->shouldUseIntRowptrs()) {
4846 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
4848 &applyHelper->handle_int, modeKK,
4852 &applyHelper->handle, modeKK,
4857 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4861 Teuchos::ETransp mode,
4863 Scalar beta)
const {
4865 const char fnName[] =
"Tpetra::CrsMatrix::apply";
4867 TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
4868 fnName <<
": Cannot call apply() until fillComplete() "
4869 "has been called.");
4871 if (mode == Teuchos::NO_TRANS) {
4872 ProfilingRegion regionNonTranspose(fnName);
4873 this->applyNonTranspose(X, Y, alpha, beta);
4875 ProfilingRegion regionTranspose(
"Tpetra::CrsMatrix::apply (transpose)");
4876 this->applyTranspose(X, Y, mode, alpha, beta);
4880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4882 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node>>
4887 const char tfecfFuncName[] =
"convert: ";
4889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillComplete(), std::runtime_error,
4890 "This matrix (the source "
4891 "of the conversion) is not fill complete. You must first call "
4892 "fillComplete() (possibly with the domain and range Map) without an "
4893 "intervening call to resumeFill(), before you may call this method.");
4895 RCP<output_matrix_type> newMatrix(
new output_matrix_type(this->getCrsGraph()));
4899 copyConvert(newMatrix->getLocalMatrixDevice().values,
4900 this->getLocalMatrixDevice().values);
4904 newMatrix->fillComplete(this->getDomainMap(), this->getRangeMap());
4909 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4914 const char tfecfFuncName[] =
"checkInternalState: ";
4916 "Internal state is not consistent. "
4917 "Please report this bug to the Tpetra developers.";
4921 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_.is_null(), std::logic_error, err);
4925 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!myGraph_.is_null() && myGraph_ != staticGraph_,
4926 std::logic_error, err);
4928 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillComplete() && !staticGraph_->isFillComplete(),
4929 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
4930 "but its graph is NOT fill complete.");
4933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_->indicesAreAllocated() &&
4934 staticGraph_->getLocalAllocationSize() > 0 &&
4935 staticGraph_->getLocalNumRows() > 0 &&
4936 valuesUnpacked_wdv.extent(0) == 0,
4937 std::logic_error, err);
4941 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4945 std::ostringstream os;
4947 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
4948 if (this->getObjectLabel() !=
"") {
4949 os <<
"Label: \"" << this->getObjectLabel() <<
"\", ";
4951 if (isFillComplete()) {
4952 os <<
"isFillComplete: true"
4953 <<
", global dimensions: [" << getGlobalNumRows() <<
", "
4954 << getGlobalNumCols() <<
"]"
4955 <<
", global number of entries: " << getGlobalNumEntries()
4958 os <<
"isFillComplete: false"
4959 <<
", global dimensions: [" << getGlobalNumRows() <<
", "
4960 << getGlobalNumCols() <<
"]}";
4965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4968 const Teuchos::EVerbosityLevel verbLevel)
const {
4971 using Teuchos::ArrayView;
4972 using Teuchos::Comm;
4974 using Teuchos::TypeNameTraits;
4975 using Teuchos::VERB_DEFAULT;
4976 using Teuchos::VERB_EXTREME;
4977 using Teuchos::VERB_HIGH;
4978 using Teuchos::VERB_LOW;
4979 using Teuchos::VERB_MEDIUM;
4980 using Teuchos::VERB_NONE;
4982 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4984 if (vl == VERB_NONE) {
4989 Teuchos::OSTab tab0(out);
4991 RCP<const Comm<int>> comm = this->getComm();
4992 const int myRank = comm->getRank();
4993 const int numProcs = comm->getSize();
4995 for (
size_t dec = 10; dec < getGlobalNumRows(); dec *= 10) {
4998 width = std::max<size_t>(width,
static_cast<size_t>(11)) + 2;
5008 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5010 Teuchos::OSTab tab1(out);
5013 if (this->getObjectLabel() !=
"") {
5014 out <<
"Label: \"" << this->getObjectLabel() <<
"\", ";
5017 out <<
"Template parameters:" << endl;
5018 Teuchos::OSTab tab2(out);
5019 out <<
"Scalar: " << TypeNameTraits<Scalar>::name() << endl
5020 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name() << endl
5021 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name() << endl
5022 <<
"Node: " << TypeNameTraits<Node>::name() << endl;
5024 if (isFillComplete()) {
5025 out <<
"isFillComplete: true" << endl
5026 <<
"Global dimensions: [" << getGlobalNumRows() <<
", "
5027 << getGlobalNumCols() <<
"]" << endl
5028 <<
"Global number of entries: " << getGlobalNumEntries() << endl
5030 <<
"Global max number of entries in a row: "
5031 << getGlobalMaxNumRowEntries() << endl;
5033 out <<
"isFillComplete: false" << endl
5034 <<
"Global dimensions: [" << getGlobalNumRows() <<
", "
5035 << getGlobalNumCols() <<
"]" << endl;
5039 if (vl < VERB_MEDIUM) {
5046 <<
"Row Map:" << endl;
5048 if (getRowMap().is_null()) {
5050 out <<
"null" << endl;
5056 getRowMap()->describe(out, vl);
5061 out <<
"Column Map: ";
5063 if (getColMap().is_null()) {
5065 out <<
"null" << endl;
5067 }
else if (getColMap() == getRowMap()) {
5069 out <<
"same as row Map" << endl;
5075 getColMap()->describe(out, vl);
5080 out <<
"Domain Map: ";
5082 if (getDomainMap().is_null()) {
5084 out <<
"null" << endl;
5086 }
else if (getDomainMap() == getRowMap()) {
5088 out <<
"same as row Map" << endl;
5090 }
else if (getDomainMap() == getColMap()) {
5092 out <<
"same as column Map" << endl;
5098 getDomainMap()->describe(out, vl);
5103 out <<
"Range Map: ";
5105 if (getRangeMap().is_null()) {
5107 out <<
"null" << endl;
5109 }
else if (getRangeMap() == getDomainMap()) {
5111 out <<
"same as domain Map" << endl;
5113 }
else if (getRangeMap() == getRowMap()) {
5115 out <<
"same as row Map" << endl;
5121 getRangeMap()->describe(out, vl);
5125 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5126 if (myRank == curRank) {
5127 out <<
"Process rank: " << curRank << endl;
5128 Teuchos::OSTab tab2(out);
5129 if (!staticGraph_->indicesAreAllocated()) {
5130 out <<
"Graph indices not allocated" << endl;
5132 out <<
"Number of allocated entries: "
5133 << staticGraph_->getLocalAllocationSize() << endl;
5135 out <<
"Number of entries: " << getLocalNumEntries() << endl
5136 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries()
5145 if (vl < VERB_HIGH) {
5150 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5151 if (myRank == curRank) {
5152 out << std::setw(width) <<
"Proc Rank"
5153 << std::setw(width) <<
"Global Row"
5154 << std::setw(width) <<
"Num Entries";
5155 if (vl == VERB_EXTREME) {
5156 out << std::setw(width) <<
"(Index,Value)";
5159 for (
size_t r = 0; r < getLocalNumRows(); ++r) {
5160 const size_t nE = getNumEntriesInLocalRow(r);
5161 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5162 out << std::setw(width) << myRank
5163 << std::setw(width) << gid
5164 << std::setw(width) << nE;
5165 if (vl == VERB_EXTREME) {
5166 if (isGloballyIndexed()) {
5167 global_inds_host_view_type rowinds;
5168 values_host_view_type rowvals;
5169 getGlobalRowView(gid, rowinds, rowvals);
5170 for (
size_t j = 0; j < nE; ++j) {
5171 out <<
" (" << rowinds[j]
5172 <<
", " << rowvals[j]
5175 }
else if (isLocallyIndexed()) {
5176 local_inds_host_view_type rowinds;
5177 values_host_view_type rowvals;
5178 getLocalRowView(r, rowinds, rowvals);
5179 for (
size_t j = 0; j < nE; ++j) {
5180 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5181 <<
", " << rowvals[j]
5197 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5209 return (srcRowMat !=
nullptr);
5212 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5215 const typename crs_graph_type::padding_type& padding,
5216 const bool verbose) {
5220 using LO = local_ordinal_type;
5221 using row_ptrs_type =
5222 typename local_graph_device_type::row_map_type::non_const_type;
5223 using range_policy =
5224 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5225 const char tfecfFuncName[] =
"applyCrsPadding";
5226 const char suffix[] =
5227 ". Please report this bug to the Tpetra developers.";
5228 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5230 std::unique_ptr<std::string> prefix;
5232 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5233 std::ostringstream os;
5234 os << *prefix <<
"padding: ";
5237 std::cerr << os.str();
5239 const int myRank = !verbose ? -1 : [&]() {
5240 auto map = this->getMap();
5241 if (map.is_null()) {
5244 auto comm = map->getComm();
5245 if (comm.is_null()) {
5248 return comm->getRank();
5252 if (!myGraph_->indicesAreAllocated()) {
5254 std::ostringstream os;
5255 os << *prefix <<
"Call allocateIndices" << endl;
5256 std::cerr << os.str();
5258 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5270 std::ostringstream os;
5271 os << *prefix <<
"Allocate row_ptrs_beg: "
5272 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5273 std::cerr << os.str();
5275 using Kokkos::view_alloc;
5276 using Kokkos::WithoutInitializing;
5277 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5278 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5280 Kokkos::deep_copy(execution_space(), row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5282 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptr_beg.extent(0) - 1);
5284 std::ostringstream os;
5285 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5286 std::cerr << os.str();
5288 row_ptrs_type row_ptr_end(
5289 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5291 row_ptrs_type num_row_entries_d;
5293 const bool refill_num_row_entries =
5294 myGraph_->k_numRowEntries_.extent(0) != 0;
5296 if (refill_num_row_entries) {
5299 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5300 myGraph_->k_numRowEntries_);
5301 Kokkos::parallel_for(
5302 "Fill end row pointers", range_policy(0, N),
5303 KOKKOS_LAMBDA(
const size_t i) {
5304 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5310 Kokkos::parallel_for(
5311 "Fill end row pointers", range_policy(0, N),
5312 KOKKOS_LAMBDA(
const size_t i) {
5313 row_ptr_end(i) = row_ptr_beg(i + 1);
5317 if (myGraph_->isGloballyIndexed()) {
5319 myGraph_->gblInds_wdv,
5320 valuesUnpacked_wdv, padding, myRank, verbose);
5321 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5322 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(newValuesLen != newColIndsLen, std::logic_error,
5324 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5325 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5329 myGraph_->lclIndsUnpacked_wdv,
5330 valuesUnpacked_wdv, padding, myRank, verbose);
5331 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5332 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(newValuesLen != newColIndsLen, std::logic_error,
5334 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5335 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5339 if (refill_num_row_entries) {
5340 Kokkos::parallel_for(
5341 "Fill num entries", range_policy(0, N),
5342 KOKKOS_LAMBDA(
const size_t i) {
5343 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5349 std::ostringstream os;
5350 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5351 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5352 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5353 std::cerr << os.str();
5354 TEUCHOS_ASSERT(myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5355 row_ptr_beg.extent(0));
5357 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5360 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5361 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5362 copyAndPermuteStaticGraph(
5363 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5364 const size_t numSameIDs,
5365 const LocalOrdinal permuteToLIDs[],
5366 const LocalOrdinal permuteFromLIDs[],
5367 const size_t numPermutes) {
5368 using Details::ProfilingRegion;
5370 using Teuchos::Array;
5371 using Teuchos::ArrayView;
5372 using LO = LocalOrdinal;
5373 using GO = GlobalOrdinal;
5374 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5375 const char suffix[] =
5376 " Please report this bug to the Tpetra developers.";
5377 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5381 std::unique_ptr<std::string> prefix;
5383 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5384 std::ostringstream os;
5385 os << *prefix <<
"Start" << endl;
5387 const char*
const prefix_raw =
5388 verbose ? prefix.get()->c_str() :
nullptr;
5390 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed();
5395 const map_type& srcRowMap = *(srcMat.getRowMap());
5396 nonconst_global_inds_host_view_type rowInds;
5397 nonconst_values_host_view_type rowVals;
5398 const LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
5399 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5403 const GO sourceGID = srcRowMap.getGlobalElement(sourceLID);
5404 const GO targetGID = sourceGID;
5406 ArrayView<const GO> rowIndsConstView;
5407 ArrayView<const Scalar> rowValsConstView;
5409 if (sourceIsLocallyIndexed) {
5410 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5411 if (rowLength > static_cast<size_t>(rowInds.size())) {
5412 Kokkos::resize(rowInds, rowLength);
5413 Kokkos::resize(rowVals, rowLength);
5417 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5418 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5423 size_t checkRowLength = 0;
5424 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5425 rowValsView, checkRowLength);
5427 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5430 << sourceGID <<
", the source "
5431 "matrix's getNumEntriesInGlobalRow returns a row length "
5433 << rowLength <<
", but getGlobalRowCopy reports "
5435 << checkRowLength <<
"." << suffix);
5442 rowIndsConstView = Teuchos::ArrayView<const GO>(
5443 rowIndsView.data(), rowIndsView.extent(0),
5444 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5445 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5446 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5447 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5451 global_inds_host_view_type rowIndsView;
5452 values_host_view_type rowValsView;
5453 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5458 rowIndsConstView = Teuchos::ArrayView<const GO>(
5459 rowIndsView.data(), rowIndsView.extent(0),
5460 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5461 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5462 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5463 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5470 combineGlobalValues(targetGID, rowIndsConstView,
5472 prefix_raw, debug, verbose);
5476 std::ostringstream os;
5477 os << *prefix <<
"Do permutes" << endl;
5480 const map_type& tgtRowMap = *(this->getRowMap());
5481 for (
size_t p = 0; p < numPermutes; ++p) {
5482 const GO sourceGID = srcRowMap.getGlobalElement(permuteFromLIDs[p]);
5483 const GO targetGID = tgtRowMap.getGlobalElement(permuteToLIDs[p]);
5485 ArrayView<const GO> rowIndsConstView;
5486 ArrayView<const Scalar> rowValsConstView;
5488 if (sourceIsLocallyIndexed) {
5489 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5490 if (rowLength > static_cast<size_t>(rowInds.size())) {
5491 Kokkos::resize(rowInds, rowLength);
5492 Kokkos::resize(rowVals, rowLength);
5496 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5497 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5502 size_t checkRowLength = 0;
5503 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5504 rowValsView, checkRowLength);
5506 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5508 "source matrix global row index "
5509 << sourceGID <<
", "
5510 "getNumEntriesInGlobalRow returns a row length of "
5511 << rowLength <<
", but getGlobalRowCopy a row length of "
5512 << checkRowLength <<
"." << suffix);
5519 rowIndsConstView = Teuchos::ArrayView<const GO>(
5520 rowIndsView.data(), rowIndsView.extent(0),
5521 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5522 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5523 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5524 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5528 global_inds_host_view_type rowIndsView;
5529 values_host_view_type rowValsView;
5530 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5535 rowIndsConstView = Teuchos::ArrayView<const GO>(
5536 rowIndsView.data(), rowIndsView.extent(0),
5537 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5538 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5539 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5540 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5545 combineGlobalValues(targetGID, rowIndsConstView,
5547 prefix_raw, debug, verbose);
5551 std::ostringstream os;
5552 os << *prefix <<
"Done" << endl;
5556 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5557 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5558 copyAndPermuteNonStaticGraph(
5559 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5560 const size_t numSameIDs,
5561 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5562 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5563 const size_t numPermutes) {
5564 using Details::ProfilingRegion;
5566 using Teuchos::Array;
5567 using Teuchos::ArrayView;
5568 using LO = LocalOrdinal;
5569 using GO = GlobalOrdinal;
5570 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5571 const char suffix[] =
5572 " Please report this bug to the Tpetra developers.";
5573 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5577 std::unique_ptr<std::string> prefix;
5579 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5580 std::ostringstream os;
5581 os << *prefix <<
"Start" << endl;
5583 const char*
const prefix_raw =
5584 verbose ? prefix.get()->c_str() :
nullptr;
5587 using row_graph_type = RowGraph<LO, GO, Node>;
5588 const row_graph_type& srcGraph = *(srcMat.getGraph());
5590 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5591 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5592 applyCrsPadding(*padding, verbose);
5594 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed();
5599 const map_type& srcRowMap = *(srcMat.getRowMap());
5600 const LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
5601 using gids_type = nonconst_global_inds_host_view_type;
5602 using vals_type = nonconst_values_host_view_type;
5605 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5609 const GO sourceGID = srcRowMap.getGlobalElement(sourceLID);
5610 const GO targetGID = sourceGID;
5612 ArrayView<const GO> rowIndsConstView;
5613 ArrayView<const Scalar> rowValsConstView;
5615 if (sourceIsLocallyIndexed) {
5616 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5617 if (rowLength > static_cast<size_t>(rowInds.extent(0))) {
5618 Kokkos::resize(rowInds, rowLength);
5619 Kokkos::resize(rowVals, rowLength);
5623 gids_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5624 vals_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5629 size_t checkRowLength = 0;
5630 srcMat.getGlobalRowCopy(sourceGID, rowIndsView, rowValsView,
5633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5636 << sourceGID <<
", the source "
5637 "matrix's getNumEntriesInGlobalRow returns a row length "
5639 << rowLength <<
", but getGlobalRowCopy reports "
5641 << checkRowLength <<
"." << suffix);
5643 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5644 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar*
>(rowValsView.data()), rowLength);
5646 global_inds_host_view_type rowIndsView;
5647 values_host_view_type rowValsView;
5648 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5654 rowIndsConstView = Teuchos::ArrayView<const GO>(
5655 rowIndsView.data(), rowIndsView.extent(0),
5656 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5657 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5658 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5659 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5665 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5666 rowValsConstView, prefix_raw, debug, verbose);
5670 std::ostringstream os;
5671 os << *prefix <<
"Do permutes" << endl;
5673 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5674 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5676 const map_type& tgtRowMap = *(this->getRowMap());
5677 for (
size_t p = 0; p < numPermutes; ++p) {
5678 const GO sourceGID = srcRowMap.getGlobalElement(permuteFromLIDs[p]);
5679 const GO targetGID = tgtRowMap.getGlobalElement(permuteToLIDs[p]);
5681 ArrayView<const GO> rowIndsConstView;
5682 ArrayView<const Scalar> rowValsConstView;
5684 if (sourceIsLocallyIndexed) {
5685 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5686 if (rowLength > static_cast<size_t>(rowInds.extent(0))) {
5687 Kokkos::resize(rowInds, rowLength);
5688 Kokkos::resize(rowVals, rowLength);
5692 gids_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5693 vals_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5698 size_t checkRowLength = 0;
5699 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5700 rowValsView, checkRowLength);
5702 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5704 "source matrix global row index "
5705 << sourceGID <<
", "
5706 "getNumEntriesInGlobalRow returns a row length of "
5707 << rowLength <<
", but getGlobalRowCopy a row length of "
5708 << checkRowLength <<
"." << suffix);
5710 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5711 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar*
>(rowValsView.data()), rowLength);
5713 global_inds_host_view_type rowIndsView;
5714 values_host_view_type rowValsView;
5715 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5721 rowIndsConstView = Teuchos::ArrayView<const GO>(
5722 rowIndsView.data(), rowIndsView.extent(0),
5723 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5724 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5725 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5726 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5732 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5733 rowValsConstView, prefix_raw, debug, verbose);
5737 std::ostringstream os;
5738 os << *prefix <<
"Done" << endl;
5742 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5746 const size_t numSameIDs,
5747 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
5748 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
5756 const char tfecfFuncName[] =
"copyAndPermute: ";
5757 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
5759 const bool verbose = Behavior::verbose(
"CrsMatrix");
5760 std::unique_ptr<std::string> prefix;
5762 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
5763 std::ostringstream os;
5764 os << *prefix << endl
5765 << *prefix <<
" numSameIDs: " << numSameIDs << endl
5766 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
5775 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
5777 std::cerr << os.str();
5780 const auto numPermute = permuteToLIDs.extent(0);
5781 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numPermute != permuteFromLIDs.extent(0),
5782 std::invalid_argument,
"permuteToLIDs.extent(0) = " << numPermute <<
"!= permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) <<
".");
5787 const RMT& srcMat =
dynamic_cast<const RMT&
>(srcObj);
5788 if (isStaticGraph()) {
5789 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
5790 auto permuteToLIDs_h = permuteToLIDs.view_host();
5791 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
5792 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5794 copyAndPermuteStaticGraph(srcMat, numSameIDs,
5795 permuteToLIDs_h.data(),
5796 permuteFromLIDs_h.data(),
5799 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
5800 permuteFromLIDs, numPermute);
5804 std::ostringstream os;
5805 os << *prefix <<
"Done" << endl;
5806 std::cerr << os.str();
5810 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5813 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
5814 Kokkos::DualView<char*, buffer_device_type>& exports,
5815 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5816 size_t& constantNumPackets) {
5821 using Teuchos::outArg;
5822 using Teuchos::REDUCE_MAX;
5823 using Teuchos::reduceAll;
5824 typedef LocalOrdinal LO;
5825 typedef GlobalOrdinal GO;
5826 const char tfecfFuncName[] =
"packAndPrepare: ";
5827 ProfilingRegion regionPAP(
"Tpetra::CrsMatrix::packAndPrepare");
5829 const bool debug = Behavior::debug(
"CrsMatrix");
5830 const bool verbose = Behavior::verbose(
"CrsMatrix");
5833 Teuchos::RCP<const Teuchos::Comm<int>> pComm = this->getComm();
5834 if (pComm.is_null()) {
5837 const Teuchos::Comm<int>& comm = *pComm;
5838 const int myRank = comm.getSize();
5840 std::unique_ptr<std::string> prefix;
5842 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
5843 std::ostringstream os;
5844 os << *prefix <<
"Start" << endl
5854 std::cerr << os.str();
5877 std::ostringstream msg;
5880 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
5881 const crs_matrix_type* srcCrsMat =
5882 dynamic_cast<const crs_matrix_type*
>(&source);
5883 if (srcCrsMat !=
nullptr) {
5885 std::ostringstream os;
5886 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
5889 std::cerr << os.str();
5892 srcCrsMat->packNew(exportLIDs, exports, numPacketsPerLID,
5893 constantNumPackets);
5894 }
catch (std::exception& e) {
5896 msg <<
"Proc " << myRank <<
": " << e.what() << std::endl;
5899 using Kokkos::HostSpace;
5900 using Kokkos::subview;
5901 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
5902 using range_type = Kokkos::pair<size_t, size_t>;
5905 std::ostringstream os;
5906 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
5908 std::cerr << os.str();
5911 const row_matrix_type* srcRowMat =
5912 dynamic_cast<const row_matrix_type*
>(&source);
5913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowMat ==
nullptr, std::invalid_argument,
5914 "The source object of the Import or Export operation is neither a "
5915 "CrsMatrix (with the same template parameters as the target object), "
5916 "nor a RowMatrix (with the same first four template parameters as the "
5927 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5928 auto exportLIDs_h = exportLIDs.view_host();
5929 Teuchos::ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5930 exportLIDs_h.size());
5934 Teuchos::Array<char> exports_a;
5940 numPacketsPerLID.clear_sync_state();
5941 numPacketsPerLID.modify_host();
5942 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5943 Teuchos::ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5944 numPacketsPerLID_h.size());
5949 srcRowMat->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5950 constantNumPackets);
5951 }
catch (std::exception& e) {
5953 msg <<
"Proc " << myRank <<
": " << e.what() << std::endl;
5957 const size_t newAllocSize =
static_cast<size_t>(exports_a.size());
5958 if (static_cast<size_t>(exports.extent(0)) < newAllocSize) {
5959 const std::string oldLabel = exports.view_device().label();
5960 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
5961 exports = exports_type(newLabel, newAllocSize);
5966 exports.modify_host();
5968 auto exports_h = exports.view_host();
5969 auto exports_h_sub = subview(exports_h, range_type(0, newAllocSize));
5973 typedef typename exports_type::t_host::execution_space HES;
5974 typedef Kokkos::Device<HES, HostSpace> host_device_type;
5975 Kokkos::View<const char*, host_device_type>
5976 exports_a_kv(exports_a.getRawPtr(), newAllocSize);
5983 reduceAll<int, int>(comm, REDUCE_MAX, lclBad, outArg(gblBad));
5986 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
5987 "packNew() or pack() threw an exception on "
5988 "one or more participating processes.");
5991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclBad != 0, std::logic_error,
5992 "packNew threw an exception on one "
5993 "or more participating processes. Here is this process' error "
5999 std::ostringstream os;
6000 os << *prefix <<
"packAndPrepare: Done!" << endl
6010 std::cerr << os.str();
6014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6016 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6017 packRow(
char exports[],
6018 const size_t offset,
6019 const size_t numEnt,
6020 const GlobalOrdinal gidsIn[],
6021 const impl_scalar_type valsIn[],
6022 const size_t numBytesPerValue)
const {
6023 using Kokkos::subview;
6026 typedef LocalOrdinal LO;
6027 typedef GlobalOrdinal GO;
6028 typedef impl_scalar_type ST;
6036 const LO numEntLO =
static_cast<size_t>(numEnt);
6038 const size_t numEntBeg = offset;
6040 const size_t gidsBeg = numEntBeg + numEntLen;
6041 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
6042 const size_t valsBeg = gidsBeg + gidsLen;
6043 const size_t valsLen = numEnt * numBytesPerValue;
6045 char*
const numEntOut = exports + numEntBeg;
6046 char*
const gidsOut = exports + gidsBeg;
6047 char*
const valsOut = exports + valsBeg;
6049 size_t numBytesOut = 0;
6054 Kokkos::pair<int, size_t> p;
6055 p = PackTraits<GO>::packArray(gidsOut, gidsIn, numEnt);
6056 errorCode += p.first;
6057 numBytesOut += p.second;
6059 p = PackTraits<ST>::packArray(valsOut, valsIn, numEnt);
6060 errorCode += p.first;
6061 numBytesOut += p.second;
6064 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6065 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
6068 << numBytesOut <<
" != expectedNumBytes = "
6069 << expectedNumBytes <<
".");
6070 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
6072 "PackTraits::packArray returned a nonzero error code");
6077 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6079 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6080 unpackRow(GlobalOrdinal gidsOut[],
6081 impl_scalar_type valsOut[],
6082 const char imports[],
6083 const size_t offset,
6084 const size_t numBytes,
6085 const size_t numEnt,
6086 const size_t numBytesPerValue) {
6087 using Kokkos::subview;
6090 typedef LocalOrdinal LO;
6091 typedef GlobalOrdinal GO;
6092 typedef impl_scalar_type ST;
6094 Details::ProfilingRegion region_upack_row(
6095 "Tpetra::CrsMatrix::unpackRow",
6098 if (numBytes == 0) {
6101 const int myRank = this->getMap()->getComm()->getRank();
6102 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6103 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6104 "number of entries to unpack (as reported by numPacketsPerLID) "
6105 "for this row numEnt="
6106 << numEnt <<
" != 0.");
6111 if (numEnt == 0 && numBytes != 0) {
6112 const int myRank = this->getMap()->getComm()->getRank();
6113 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6114 "unpackRow: The number of entries to unpack (as reported by "
6115 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6117 << numBytes <<
" != 0.");
6123 const size_t numEntBeg = offset;
6125 const size_t gidsBeg = numEntBeg + numEntLen;
6126 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
6127 const size_t valsBeg = gidsBeg + gidsLen;
6128 const size_t valsLen = numEnt * numBytesPerValue;
6130 const char*
const numEntIn = imports + numEntBeg;
6131 const char*
const gidsIn = imports + gidsBeg;
6132 const char*
const valsIn = imports + valsBeg;
6134 size_t numBytesOut = 0;
6138 if (static_cast<size_t>(numEntOut) != numEnt ||
6139 numEntOut == static_cast<LO>(0)) {
6140 const int myRank = this->getMap()->getComm()->getRank();
6141 std::ostringstream os;
6142 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6143 bool firstErrorCondition =
false;
6144 if (static_cast<size_t>(numEntOut) != numEnt) {
6145 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6146 <<
" does not equal number of entries unpacked from imports "
6148 << numEntOut <<
".";
6149 firstErrorCondition =
true;
6151 if (numEntOut == static_cast<LO>(0)) {
6152 if (firstErrorCondition) {
6155 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6156 "but number of bytes to unpack for this row numBytes="
6158 <<
" != 0. This should never happen, since packRow should only "
6159 "ever pack rows with a nonzero number of entries. In this case, "
6160 "the number of entries from numPacketsPerLID is numEnt="
6164 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str());
6168 Kokkos::pair<int, size_t> p;
6169 p = PackTraits<GO>::unpackArray(gidsOut, gidsIn, numEnt);
6170 errorCode += p.first;
6171 numBytesOut += p.second;
6173 p = PackTraits<ST>::unpackArray(valsOut, valsIn, numEnt);
6174 errorCode += p.first;
6175 numBytesOut += p.second;
6178 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6180 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6181 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
6184 << numBytesOut <<
" != expectedNumBytes = "
6185 << expectedNumBytes <<
".");
6187 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
6189 "PackTraits::unpackArray returned a nonzero error code");
6194 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6195 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6196 allocatePackSpaceNew(Kokkos::DualView<char*, buffer_device_type>& exports,
6197 size_t& totalNumEntries,
6198 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const {
6199 using Details::Behavior;
6202 typedef impl_scalar_type IST;
6203 typedef LocalOrdinal LO;
6204 typedef GlobalOrdinal GO;
6210 const bool verbose = Behavior::verbose(
"CrsMatrix");
6211 std::unique_ptr<std::string> prefix;
6213 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6214 std::ostringstream os;
6215 os << *prefix <<
"Before:"
6223 std::cerr << os.str();
6228 const LO numExportLIDs =
static_cast<LO
>(exportLIDs.extent(0));
6230 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
6231 auto exportLIDs_h = exportLIDs.view_host();
6234 totalNumEntries = 0;
6235 for (LO i = 0; i < numExportLIDs; ++i) {
6236 const LO lclRow = exportLIDs_h[i];
6237 size_t curNumEntries = this->getNumEntriesInLocalRow(lclRow);
6240 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid()) {
6243 totalNumEntries += curNumEntries;
6254 const size_t allocSize =
6255 static_cast<size_t>(numExportLIDs) *
sizeof(LO) +
6256 totalNumEntries * (
sizeof(IST) +
sizeof(GO));
6257 if (static_cast<size_t>(exports.extent(0)) < allocSize) {
6258 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6260 const std::string oldLabel = exports.view_device().label();
6261 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6262 exports = exports_type(newLabel, allocSize);
6266 std::ostringstream os;
6267 os << *prefix <<
"After:"
6275 std::cerr << os.str();
6279 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6281 packNew(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6282 Kokkos::DualView<char*, buffer_device_type>& exports,
6283 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6284 size_t& constantNumPackets)
const {
6287 if (this->isStaticGraph()) {
6290 constantNumPackets);
6292 this->packNonStaticNew(exportLIDs, exports, numPacketsPerLID,
6293 constantNumPackets);
6297 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6299 packNonStaticNew(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6300 Kokkos::DualView<char*, buffer_device_type>& exports,
6301 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6302 size_t& constantNumPackets)
const {
6309 using LO = LocalOrdinal;
6310 using GO = GlobalOrdinal;
6311 using ST = impl_scalar_type;
6312 const char tfecfFuncName[] =
"packNonStaticNew: ";
6314 const bool verbose = Behavior::verbose(
"CrsMatrix");
6315 std::unique_ptr<std::string> prefix;
6317 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6318 std::ostringstream os;
6319 os << *prefix <<
"Start" << endl;
6320 std::cerr << os.str();
6323 const size_t numExportLIDs =
static_cast<size_t>(exportLIDs.extent(0));
6324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != static_cast<size_t>(numPacketsPerLID.extent(0)),
6325 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent(0) <<
".");
6330 constantNumPackets = 0;
6335 size_t totalNumEntries = 0;
6336 this->allocatePackSpaceNew(exports, totalNumEntries, exportLIDs);
6337 const size_t bufSize =
static_cast<size_t>(exports.extent(0));
6340 exports.clear_sync_state();
6341 exports.modify_host();
6342 auto exports_h = exports.view_host();
6344 std::ostringstream os;
6345 os << *prefix <<
"After marking exports as modified on host, "
6347 std::cerr << os.str();
6351 auto exportLIDs_h = exportLIDs.view_host();
6354 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6355 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6356 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6361 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6364 typename global_inds_host_view_type::non_const_type gidsIn_k;
6365 if (this->isLocallyIndexed()) {
6367 typename global_inds_host_view_type::non_const_type(
"packGids",
6372 for (
size_t i = 0; i < numExportLIDs; ++i) {
6373 const LO lclRow = exportLIDs_h[i];
6375 size_t numBytes = 0;
6376 size_t numEnt = this->getNumEntriesInLocalRow(lclRow);
6383 numPacketsPerLID_h[i] = 0;
6387 if (this->isLocallyIndexed()) {
6388 typename global_inds_host_view_type::non_const_type gidsIn;
6389 values_host_view_type valsIn;
6393 local_inds_host_view_type lidsIn;
6394 this->getLocalRowView(lclRow, lidsIn, valsIn);
6395 const map_type& colMap = *(this->getColMap());
6396 for (
size_t k = 0; k < numEnt; ++k) {
6397 gidsIn_k[k] = colMap.getGlobalElement(lidsIn[k]);
6399 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0), GO(numEnt)));
6401 const size_t numBytesPerValue =
6402 PackTraits<ST>::packValueCount(valsIn[0]);
6403 numBytes = this->packRow(exports_h.data(), offset, numEnt,
6404 gidsIn.data(), valsIn.data(),
6406 }
else if (this->isGloballyIndexed()) {
6407 global_inds_host_view_type gidsIn;
6408 values_host_view_type valsIn;
6414 const map_type& rowMap = *(this->getRowMap());
6415 const GO gblRow = rowMap.getGlobalElement(lclRow);
6416 this->getGlobalRowView(gblRow, gidsIn, valsIn);
6418 const size_t numBytesPerValue =
6419 PackTraits<ST>::packValueCount(valsIn[0]);
6420 numBytes = this->packRow(exports_h.data(), offset, numEnt,
6421 gidsIn.data(), valsIn.data(),
6428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6429 "First invalid offset into 'exports' pack buffer at index i = " << i
6430 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " << bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
".");
6434 numPacketsPerLID_h[i] = numBytes;
6439 std::ostringstream os;
6440 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6447 std::cerr << os.str();
6451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6453 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6454 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6455 const LocalOrdinal numEnt,
6456 const impl_scalar_type vals[],
6457 const GlobalOrdinal cols[],
6459 const char*
const prefix,
6461 const bool verbose) {
6462 using GO = GlobalOrdinal;
6466 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6467 Teuchos::ArrayView<const GO> cols_av(numEnt == 0 ?
nullptr : cols, numEnt);
6468 Teuchos::ArrayView<const Scalar> vals_av(numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*>(vals), numEnt);
6473 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6474 prefix, debug, verbose);
6478 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6479 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6480 combineGlobalValues(
6481 const GlobalOrdinal globalRowIndex,
6482 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6483 const Teuchos::ArrayView<const Scalar>& values,
6485 const char*
const prefix,
6487 const bool verbose) {
6488 const char tfecfFuncName[] =
"combineGlobalValues: ";
6490 if (isStaticGraph()) {
6494 if (combineMode ==
ADD) {
6495 sumIntoGlobalValues(globalRowIndex, columnIndices, values);
6496 }
else if (combineMode ==
REPLACE) {
6497 replaceGlobalValues(globalRowIndex, columnIndices, values);
6498 }
else if (combineMode ==
ABSMAX) {
6499 using ::Tpetra::Details::AbsMax;
6501 this->
template transformGlobalValues<AbsMax<Scalar>>(globalRowIndex,
6504 }
else if (combineMode ==
INSERT) {
6505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph() && combineMode ==
INSERT,
6506 std::invalid_argument,
6507 "INSERT combine mode is forbidden "
6508 "if the matrix has a static (const) graph (i.e., was "
6509 "constructed with the CrsMatrix constructor that takes a "
6510 "const CrsGraph pointer).");
6512 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
6513 "Invalid combine mode; should "
6515 "Please report this bug to the Tpetra developers.");
6518 if (combineMode ==
ADD || combineMode ==
INSERT) {
6525 insertGlobalValuesFilteredChecked(globalRowIndex,
6526 columnIndices, values, prefix, debug, verbose);
6537 else if (combineMode ==
ABSMAX) {
6538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6539 !isStaticGraph() && combineMode ==
ABSMAX, std::logic_error,
6540 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6542 }
else if (combineMode ==
REPLACE) {
6543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6544 !isStaticGraph() && combineMode ==
REPLACE, std::logic_error,
6545 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6548 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6549 true, std::logic_error,
6550 "Should never get here! Please report this "
6551 "bug to the Tpetra developers.");
6556 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6558 unpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6559 Kokkos::DualView<char*, buffer_device_type> imports,
6560 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6561 const size_t constantNumPackets,
6567 const char tfecfFuncName[] =
"unpackAndCombine: ";
6568 ProfilingRegion regionUAC(
"Tpetra::CrsMatrix::unpackAndCombine");
6570 const bool debug = Behavior::debug(
"CrsMatrix");
6571 const bool verbose = Behavior::verbose(
"CrsMatrix");
6572 constexpr
int numValidModes = 5;
6575 const char* validModeNames[numValidModes] =
6576 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6578 std::unique_ptr<std::string> prefix;
6580 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6581 std::ostringstream os;
6582 os << *prefix <<
"Start:" << endl
6592 << *prefix <<
" constantNumPackets: " << constantNumPackets
6596 std::cerr << os.str();
6600 if (std::find(validModes, validModes + numValidModes, combineMode) ==
6601 validModes + numValidModes) {
6602 std::ostringstream os;
6603 os <<
"Invalid combine mode. Valid modes are {";
6604 for (
int k = 0; k < numValidModes; ++k) {
6605 os << validModeNames[k];
6606 if (k < numValidModes - 1) {
6611 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
6613 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(importLIDs.extent(0) != numPacketsPerLID.extent(0),
6614 std::invalid_argument,
"importLIDs.extent(0)=" << importLIDs.extent(0) <<
" != numPacketsPerLID.extent(0)=" << numPacketsPerLID.extent(0) <<
".");
6617 if (combineMode ==
ZERO) {
6622 using Teuchos::reduceAll;
6623 std::unique_ptr<std::ostringstream> msg(
new std::ostringstream());
6626 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6627 constantNumPackets, combineMode,
6629 }
catch (std::exception& e) {
6634 const Teuchos::Comm<int>& comm = *(this->getComm());
6635 reduceAll<int, int>(comm, Teuchos::REDUCE_MAX,
6636 lclBad, Teuchos::outArg(gblBad));
6642 std::ostringstream os;
6643 os <<
"Proc " << comm.getRank() <<
": " << msg->str() << endl;
6644 msg = std::unique_ptr<std::ostringstream>(
new std::ostringstream());
6645 ::Tpetra::Details::gathervPrint(*msg, os.str(), comm);
6646 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error, std::endl
6647 <<
"unpackAndCombineImpl "
6648 "threw an exception on one or more participating processes: "
6653 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6654 constantNumPackets, combineMode,
6659 std::ostringstream os;
6660 os << *prefix <<
"Done!" << endl
6670 std::cerr << os.str();
6674 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6677 const Kokkos::DualView<
const local_ordinal_type*,
6678 buffer_device_type>& importLIDs,
6679 Kokkos::DualView<char*, buffer_device_type> imports,
6680 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6681 const size_t constantNumPackets,
6683 const bool verbose) {
6685 "Tpetra::CrsMatrix::unpackAndCombineImpl",
6688 const char tfecfFuncName[] =
"unpackAndCombineImpl";
6689 std::unique_ptr<std::string> prefix;
6691 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
6692 std::ostringstream os;
6693 os << *prefix <<
"isStaticGraph(): "
6694 << (isStaticGraph() ?
"true" :
"false")
6695 <<
", importLIDs.extent(0): "
6696 << importLIDs.extent(0)
6697 <<
", imports.extent(0): "
6698 << imports.extent(0)
6699 <<
", numPacketsPerLID.extent(0): "
6700 << numPacketsPerLID.extent(0)
6702 std::cerr << os.str();
6705 if (isStaticGraph()) {
6706 using Details::unpackCrsMatrixAndCombineNew;
6707 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
6708 importLIDs, constantNumPackets,
6712 using padding_type =
typename crs_graph_type::padding_type;
6713 std::unique_ptr<padding_type> padding;
6715 padding = myGraph_->computePaddingForCrsMatrixUnpack(
6716 importLIDs, imports, numPacketsPerLID, verbose);
6717 }
catch (std::exception& e) {
6718 const auto rowMap = getRowMap();
6719 const auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
6720 const int myRank = comm.is_null() ? -1 : comm->getRank();
6721 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Proc " << myRank <<
": "
6722 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
6723 "threw an exception: "
6727 std::ostringstream os;
6728 os << *prefix <<
"Call applyCrsPadding" << endl;
6729 std::cerr << os.str();
6731 applyCrsPadding(*padding, verbose);
6734 std::ostringstream os;
6735 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
6736 std::cerr << os.str();
6738 unpackAndCombineImplNonStatic(importLIDs, imports,
6745 std::ostringstream os;
6746 os << *prefix <<
"Done" << endl;
6747 std::cerr << os.str();
6751 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6752 void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6753 unpackAndCombineImplNonStatic(
6754 const Kokkos::DualView<
const local_ordinal_type*,
6755 buffer_device_type>& importLIDs,
6756 Kokkos::DualView<char*, buffer_device_type> imports,
6757 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6758 const size_t constantNumPackets,
6760 using Details::Behavior;
6763 using Details::PackTraits;
6764 using Details::ScalarViewTraits;
6765 using Kokkos::MemoryUnmanaged;
6766 using Kokkos::subview;
6769 using LO = LocalOrdinal;
6770 using GO = GlobalOrdinal;
6771 using ST = impl_scalar_type;
6772 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
6774 typename View<int*, device_type>::host_mirror_type::execution_space;
6775 using pair_type = std::pair<typename View<int*, HES>::size_type,
6776 typename View<int*, HES>::size_type>;
6777 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
6778 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
6779 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
6781 const bool debug = Behavior::debug(
"CrsMatrix");
6782 const bool verbose = Behavior::verbose(
"CrsMatrix");
6783 std::unique_ptr<std::string> prefix;
6785 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
6786 std::ostringstream os;
6787 os << *prefix << endl;
6788 std::cerr << os.str();
6790 const char*
const prefix_raw =
6791 verbose ? prefix.get()->c_str() :
nullptr;
6793 const size_type numImportLIDs = importLIDs.extent(0);
6794 if (combineMode ==
ZERO || numImportLIDs == 0) {
6798 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
6799 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
6803 if (imports.need_sync_host()) {
6804 imports.sync_host();
6806 auto imports_h = imports.view_host();
6809 if (numPacketsPerLID.need_sync_host()) {
6810 numPacketsPerLID.sync_host();
6812 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6814 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
6815 auto importLIDs_h = importLIDs.view_host();
6817 size_t numBytesPerValue;
6828 numBytesPerValue = PackTraits<ST>::packValueCount(val);
6833 size_t maxRowNumEnt = 0;
6834 for (size_type i = 0; i < numImportLIDs; ++i) {
6835 const size_t numBytes = numPacketsPerLID_h[i];
6836 if (numBytes == 0) {
6841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(offset + numBytes >
size_t(imports_h.extent(0)),
6842 std::logic_error,
": At local row index importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset <<
") + numBytes (=" << numBytes <<
") > "
6843 "imports_h.extent(0)="
6844 << imports_h.extent(0) <<
".");
6849 const size_t theNumBytes =
6851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(theNumBytes > numBytes, std::logic_error,
": theNumBytes=" << theNumBytes <<
" > numBytes = " << numBytes <<
".");
6853 const char*
const inBuf = imports_h.data() + offset;
6854 const size_t actualNumBytes =
6858 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(actualNumBytes > numBytes, std::logic_error,
": At i=" << i <<
", actualNumBytes=" << actualNumBytes <<
" > numBytes=" << numBytes <<
".");
6859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numEntLO == 0, std::logic_error,
6860 ": At local row index "
6862 << i <<
"]=" << importLIDs_h[i] <<
", "
6863 "the number of entries read from the packed data is "
6865 << numEntLO <<
", but numBytes=" << numBytes
6869 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
6877 View<GO*, HES> gblColInds;
6878 View<LO*, HES> lclColInds;
6879 View<ST*, HES> vals;
6892 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
6893 gid, maxRowNumEnt,
"gids");
6894 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
6895 lid, maxRowNumEnt,
"lids");
6896 vals = ScalarViewTraits<ST, HES>::allocateArray(
6897 val, maxRowNumEnt,
"vals");
6901 for (size_type i = 0; i < numImportLIDs; ++i) {
6902 const size_t numBytes = numPacketsPerLID_h[i];
6903 if (numBytes == 0) {
6907 const char*
const inBuf = imports_h.data() + offset;
6910 const size_t numEnt =
static_cast<size_t>(numEntLO);
6912 const LO lclRow = importLIDs_h[i];
6914 gids_out_type gidsOut = subview(gblColInds, pair_type(0, numEnt));
6915 vals_out_type valsOut = subview(vals, pair_type(0, numEnt));
6917 const size_t numBytesOut =
6918 unpackRow(gidsOut.data(), valsOut.data(), imports_h.data(),
6919 offset, numBytes, numEnt, numBytesPerValue);
6920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numBytes != numBytesOut, std::logic_error,
": At i=" << i <<
", numBytes=" << numBytes <<
" != numBytesOut=" << numBytesOut <<
".");
6922 const ST*
const valsRaw =
const_cast<const ST*
>(valsOut.data());
6923 const GO*
const gidsRaw =
const_cast<const GO*
>(gidsOut.data());
6924 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
6925 combineMode, prefix_raw, debug, verbose);
6931 std::ostringstream os;
6932 os << *prefix <<
"Done" << endl;
6933 std::cerr << os.str();
6937 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6938 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
6941 const bool force)
const {
6942 using Teuchos::null;
6946 TEUCHOS_TEST_FOR_EXCEPTION(
6947 !this->hasColMap(), std::runtime_error,
6948 "Tpetra::CrsMatrix::getColumn"
6949 "MapMultiVector: You may only call this method if the matrix has a "
6950 "column Map. If the matrix does not yet have a column Map, you should "
6951 "first call fillComplete (with domain and range Map if necessary).");
6955 TEUCHOS_TEST_FOR_EXCEPTION(
6956 !this->getGraph()->isFillComplete(), std::runtime_error,
6958 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
6959 "this matrix's graph is fill complete.");
6962 RCP<const import_type> importer = this->getGraph()->getImporter();
6963 RCP<const map_type> colMap = this->getColMap();
6976 if (!importer.is_null() || force) {
6977 if (importMV_.is_null() || importMV_->getNumVectors() != numVecs) {
6978 X_colMap = rcp(
new MV(colMap, numVecs));
6981 importMV_ = X_colMap;
6983 X_colMap = importMV_;
6994 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6995 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
6998 const bool force)
const {
6999 using Teuchos::null;
7005 TEUCHOS_TEST_FOR_EXCEPTION(
7006 !this->getGraph()->isFillComplete(), std::runtime_error,
7008 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7009 "matrix's graph is fill complete.");
7012 RCP<const export_type> exporter = this->getGraph()->getExporter();
7016 RCP<const map_type> rowMap = this->getRowMap();
7028 if (!exporter.is_null() || force) {
7029 if (exportMV_.is_null() || exportMV_->getNumVectors() != numVecs) {
7030 Y_rowMap = rcp(
new MV(rowMap, numVecs));
7031 exportMV_ = Y_rowMap;
7033 Y_rowMap = exportMV_;
7039 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7042 TEUCHOS_TEST_FOR_EXCEPTION(
7043 myGraph_.is_null(), std::logic_error,
7044 "Tpetra::CrsMatrix::"
7045 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7046 "was created with a constant graph (that is, when it was created using "
7047 "the version of its constructor that takes an RCP<const CrsGraph>). "
7048 "This is because the matrix is not allowed to modify the graph in that "
7049 "case, but removing empty processes requires modifying the graph.");
7050 myGraph_->removeEmptyProcessesInPlace(newMap);
7054 this->map_ = this->getRowMap();
7058 staticGraph_ = Teuchos::rcp_const_cast<
const Graph>(myGraph_);
7061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7062 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
7067 const Teuchos::RCP<const map_type>& domainMap,
7068 const Teuchos::RCP<const map_type>& rangeMap,
7069 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
7071 using Teuchos::Array;
7072 using Teuchos::ArrayView;
7073 using Teuchos::ParameterList;
7076 using Teuchos::rcp_implicit_cast;
7077 using Teuchos::sublist;
7080 using crs_matrix_type =
7082 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7086 std::unique_ptr<std::string> prefix;
7088 prefix = this->createPrefix(
"CrsMatrix",
"add");
7089 std::ostringstream os;
7090 os << *prefix <<
"Start" << endl;
7091 std::cerr << os.str();
7094 const crs_matrix_type& B = *
this;
7095 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7096 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7104 RCP<const map_type> B_domainMap = B.getDomainMap();
7105 RCP<const map_type> B_rangeMap = B.getRangeMap();
7107 RCP<const map_type> theDomainMap = domainMap;
7108 RCP<const map_type> theRangeMap = rangeMap;
7110 if (domainMap.is_null()) {
7111 if (B_domainMap.is_null()) {
7112 TEUCHOS_TEST_FOR_EXCEPTION(
7113 A_domainMap.is_null(), std::invalid_argument,
7114 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7115 "then you must supply a nonnull domain Map to this method.");
7116 theDomainMap = A_domainMap;
7118 theDomainMap = B_domainMap;
7121 if (rangeMap.is_null()) {
7122 if (B_rangeMap.is_null()) {
7123 TEUCHOS_TEST_FOR_EXCEPTION(
7124 A_rangeMap.is_null(), std::invalid_argument,
7125 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7126 "then you must supply a nonnull range Map to this method.");
7127 theRangeMap = A_rangeMap;
7129 theRangeMap = B_rangeMap;
7137 if (!A_domainMap.is_null() && !A_rangeMap.is_null()) {
7138 if (!B_domainMap.is_null() && !B_rangeMap.is_null()) {
7139 TEUCHOS_TEST_FOR_EXCEPTION(!B_domainMap->isSameAs(*A_domainMap),
7140 std::invalid_argument,
7141 errPfx <<
"The input RowMatrix A must have a domain Map "
7142 "which is the same as (isSameAs) this RowMatrix's "
7144 TEUCHOS_TEST_FOR_EXCEPTION(!B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7145 errPfx <<
"The input RowMatrix A must have a range Map "
7146 "which is the same as (isSameAs) this RowMatrix's range "
7148 TEUCHOS_TEST_FOR_EXCEPTION(!domainMap.is_null() &&
7149 !domainMap->isSameAs(*B_domainMap),
7150 std::invalid_argument,
7151 errPfx <<
"The input domain Map must be the same as "
7152 "(isSameAs) this RowMatrix's domain Map.");
7153 TEUCHOS_TEST_FOR_EXCEPTION(!rangeMap.is_null() &&
7154 !rangeMap->isSameAs(*B_rangeMap),
7155 std::invalid_argument,
7156 errPfx <<
"The input range Map must be the same as "
7157 "(isSameAs) this RowMatrix's range Map.");
7159 }
else if (!B_domainMap.is_null() && !B_rangeMap.is_null()) {
7160 TEUCHOS_TEST_FOR_EXCEPTION(!domainMap.is_null() &&
7161 !domainMap->isSameAs(*B_domainMap),
7162 std::invalid_argument,
7163 errPfx <<
"The input domain Map must be the same as "
7164 "(isSameAs) this RowMatrix's domain Map.");
7165 TEUCHOS_TEST_FOR_EXCEPTION(!rangeMap.is_null() && !rangeMap->isSameAs(*B_rangeMap),
7166 std::invalid_argument,
7167 errPfx <<
"The input range Map must be the same as "
7168 "(isSameAs) this RowMatrix's range Map.");
7170 TEUCHOS_TEST_FOR_EXCEPTION(domainMap.is_null() || rangeMap.is_null(),
7171 std::invalid_argument, errPfx <<
"If neither A nor B "
7172 "have a domain and range Map, then you must supply a "
7173 "nonnull domain and range Map to this method.");
7180 bool callFillComplete =
true;
7181 RCP<ParameterList> constructorSublist;
7182 RCP<ParameterList> fillCompleteSublist;
7183 if (!params.is_null()) {
7185 params->get(
"Call fillComplete", callFillComplete);
7186 constructorSublist = sublist(params,
"Constructor parameters");
7187 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7190 RCP<const map_type> A_rowMap = A.
getRowMap();
7191 RCP<const map_type> B_rowMap = B.getRowMap();
7192 RCP<const map_type> C_rowMap = B_rowMap;
7193 RCP<crs_matrix_type> C;
7199 if (A_rowMap->isSameAs(*B_rowMap)) {
7200 const LO localNumRows =
static_cast<LO
>(A_rowMap->getLocalNumElements());
7201 Array<size_t> C_maxNumEntriesPerRow(localNumRows, 0);
7204 if (alpha != ZERO) {
7205 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7207 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7212 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7213 const size_t B_numEntries = B.getNumEntriesInLocalRow(localRow);
7214 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7218 if (constructorSublist.is_null()) {
7219 C = rcp(
new crs_matrix_type(C_rowMap, C_maxNumEntriesPerRow()));
7221 C = rcp(
new crs_matrix_type(C_rowMap, C_maxNumEntriesPerRow(),
7222 constructorSublist));
7232 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, errPfx <<
"The row maps must "
7233 "be the same for statically allocated matrices, to ensure "
7234 "that there is sufficient space to do the addition.");
7237 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null(), std::logic_error,
7238 errPfx <<
"C should not be null at this point. "
7239 "Please report this bug to the Tpetra developers.");
7242 std::ostringstream os;
7243 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7244 std::cerr << os.str();
7246 using gids_type = nonconst_global_inds_host_view_type;
7247 using vals_type = nonconst_values_host_view_type;
7251 if (alpha != ZERO) {
7252 const LO A_localNumRows =
static_cast<LO
>(A_rowMap->getLocalNumElements());
7253 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7255 const GO globalRow = A_rowMap->getGlobalElement(localRow);
7256 if (A_numEntries > static_cast<size_t>(ind.size())) {
7257 Kokkos::resize(ind, A_numEntries);
7258 Kokkos::resize(val, A_numEntries);
7260 gids_type indView = Kokkos::subview(ind, std::make_pair((
size_t)0, A_numEntries));
7261 vals_type valView = Kokkos::subview(val, std::make_pair((
size_t)0, A_numEntries));
7265 for (
size_t k = 0; k < A_numEntries; ++k) {
7266 valView[k] *= alpha;
7269 C->insertGlobalValues(globalRow, A_numEntries,
7270 reinterpret_cast<Scalar*>(valView.data()),
7276 const LO B_localNumRows =
static_cast<LO
>(B_rowMap->getLocalNumElements());
7277 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7278 size_t B_numEntries = B.getNumEntriesInLocalRow(localRow);
7279 const GO globalRow = B_rowMap->getGlobalElement(localRow);
7280 if (B_numEntries > static_cast<size_t>(ind.size())) {
7281 Kokkos::resize(ind, B_numEntries);
7282 Kokkos::resize(val, B_numEntries);
7284 gids_type indView = Kokkos::subview(ind, std::make_pair((
size_t)0, B_numEntries));
7285 vals_type valView = Kokkos::subview(val, std::make_pair((
size_t)0, B_numEntries));
7286 B.getGlobalRowCopy(globalRow, indView, valView, B_numEntries);
7289 for (
size_t k = 0; k < B_numEntries; ++k) {
7293 C->insertGlobalValues(globalRow, B_numEntries,
7294 reinterpret_cast<Scalar*>(valView.data()),
7299 if (callFillComplete) {
7301 std::ostringstream os;
7302 os << *prefix <<
"Call fillComplete on C" << endl;
7303 std::cerr << os.str();
7305 if (fillCompleteSublist.is_null()) {
7306 C->fillComplete(theDomainMap, theRangeMap);
7308 C->fillComplete(theDomainMap, theRangeMap, fillCompleteSublist);
7310 }
else if (verbose) {
7311 std::ostringstream os;
7312 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7313 std::cerr << os.str();
7317 std::ostringstream os;
7318 os << *prefix <<
"Done" << endl;
7319 std::cerr << os.str();
7324 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7327 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7328 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>& domainTransfer,
7329 const Teuchos::RCP<const map_type>& domainMap,
7330 const Teuchos::RCP<const map_type>& rangeMap,
7331 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
7338 using Teuchos::ArrayRCP;
7339 using Teuchos::ArrayView;
7340 using Teuchos::Comm;
7341 using Teuchos::ParameterList;
7343 typedef LocalOrdinal LO;
7344 typedef GlobalOrdinal GO;
7345 typedef node_type NT;
7350 const bool debug = Behavior::debug(
"CrsMatrix");
7351 const bool verbose = Behavior::verbose(
"CrsMatrix");
7352 int MyPID = getComm()->getRank();
7354 std::unique_ptr<std::string> verbosePrefix;
7357 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7358 std::ostringstream os;
7359 os <<
"Start" << endl;
7360 std::cerr << os.str();
7367 bool reverseMode =
false;
7368 bool restrictComm =
false;
7370 int mm_optimization_core_count =
7371 Behavior::TAFC_OptimizationCoreCount();
7372 RCP<ParameterList> matrixparams;
7373 bool overrideAllreduce =
false;
7374 bool useKokkosPath =
false;
7375 if (!params.is_null()) {
7376 matrixparams = sublist(params,
"CrsMatrix");
7377 reverseMode = params->get(
"Reverse Mode", reverseMode);
7378 useKokkosPath = params->get(
"TAFC: use kokkos path", useKokkosPath);
7379 restrictComm = params->get(
"Restrict Communicator", restrictComm);
7380 auto& slist = params->sublist(
"matrixmatrix: kernel params",
false);
7381 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7382 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
7384 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7385 if (getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7386 if (reverseMode) isMM =
false;
7390 std::shared_ptr<::Tpetra::Details::CommRequest> iallreduceRequest;
7392 int reduced_mismatch = 0;
7393 if (isMM && !overrideAllreduce) {
7395 const bool source_vals = !getGraph()->getImporter().is_null();
7396 const bool target_vals = !(rowTransfer.getExportLIDs().size() == 0 ||
7397 rowTransfer.getRemoteLIDs().size() == 0);
7398 mismatch = (source_vals != target_vals) ? 1 : 0;
7401 Teuchos::REDUCE_MAX, *(getComm()));
7404 #ifdef HAVE_TPETRA_MMM_TIMINGS
7405 using Teuchos::TimeMonitor;
7407 if (!params.is_null())
7408 label = params->get(
"Timer Label", label);
7409 std::string prefix = std::string(
"Tpetra ") + label + std::string(
": ");
7412 std::ostringstream os;
7420 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") + tlstr));
7428 const import_type* xferAsImport =
dynamic_cast<const import_type*
>(&rowTransfer);
7429 const export_type* xferAsExport =
dynamic_cast<const export_type*
>(&rowTransfer);
7430 TEUCHOS_TEST_FOR_EXCEPTION(
7431 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7432 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7433 "argument must be either an Import or an Export, and its template "
7434 "parameters must match the corresponding template parameters of the "
7442 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type>(domainTransfer);
7443 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type>(domainTransfer);
7445 if (!domainTransfer.is_null()) {
7446 TEUCHOS_TEST_FOR_EXCEPTION(
7447 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7448 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7449 "argument must be either an Import or an Export, and its template "
7450 "parameters must match the corresponding template parameters of the "
7453 TEUCHOS_TEST_FOR_EXCEPTION(
7454 (xferAsImport !=
nullptr || !xferDomainAsImport.is_null()) &&
7455 ((xferAsImport !=
nullptr && xferDomainAsImport.is_null()) ||
7456 (xferAsImport ==
nullptr && !xferDomainAsImport.is_null())),
7457 std::invalid_argument,
7458 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7459 "arguments must be of the same type (either Import or Export).");
7461 TEUCHOS_TEST_FOR_EXCEPTION(
7462 (xferAsExport !=
nullptr || !xferDomainAsExport.is_null()) &&
7463 ((xferAsExport !=
nullptr && xferDomainAsExport.is_null()) ||
7464 (xferAsExport ==
nullptr && !xferDomainAsExport.is_null())),
7465 std::invalid_argument,
7466 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7467 "arguments must be of the same type (either Import or Export).");
7472 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7476 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7477 RCP<const map_type> MyColMap;
7478 RCP<const map_type> MyDomainMap = !domainMap.is_null() ? domainMap : getDomainMap();
7479 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
7480 RCP<const map_type> BaseRowMap = MyRowMap;
7481 RCP<const map_type> BaseDomainMap = MyDomainMap;
7489 if (!destMat.is_null()) {
7500 const bool NewFlag = !destMat->getGraph()->isLocallyIndexed() &&
7501 !destMat->getGraph()->isGloballyIndexed();
7502 TEUCHOS_TEST_FOR_EXCEPTION(
7503 !NewFlag, std::invalid_argument,
7504 "Tpetra::CrsMatrix::"
7505 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7506 "to be nonnull, if its graph is empty (neither locally nor globally "
7515 TEUCHOS_TEST_FOR_EXCEPTION(
7516 !destMat->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7517 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7518 "input argument 'destMat' is not the same as the (row) Map specified "
7519 "by the input argument 'rowTransfer'.");
7520 TEUCHOS_TEST_FOR_EXCEPTION(
7521 !destMat->checkSizes(*
this), std::invalid_argument,
7522 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7523 "destination matrix, but checkSizes() indicates that it is not a legal "
7524 "legal target for redistribution from the source matrix (*this). This "
7525 "may mean that they do not have the same dimensions.");
7539 TEUCHOS_TEST_FOR_EXCEPTION(
7540 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7541 std::invalid_argument,
7542 "Tpetra::CrsMatrix::transferAndFillComplete: "
7543 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7544 TEUCHOS_TEST_FOR_EXCEPTION(
7545 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7546 std::invalid_argument,
7547 "Tpetra::CrsMatrix::transferAndFillComplete: "
7548 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7551 TEUCHOS_TEST_FOR_EXCEPTION(
7552 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7553 std::invalid_argument,
7554 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7555 "argument must be the same as the rebalanced domain map 'domainMap'");
7557 TEUCHOS_TEST_FOR_EXCEPTION(
7558 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7559 std::invalid_argument,
7560 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7561 "argument must be the same as the rebalanced domain map 'domainMap'");
7574 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7575 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7576 auto RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7577 auto PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7578 auto PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7579 Distributor& Distor = rowTransfer.getDistributor();
7582 Teuchos::Array<int> SourcePids;
7585 RCP<const map_type> ReducedRowMap, ReducedColMap,
7586 ReducedDomainMap, ReducedRangeMap;
7587 RCP<const Comm<int>> ReducedComm;
7591 if (destMat.is_null()) {
7592 destMat = rcp(
new this_CRS_type(MyRowMap, 0, matrixparams));
7599 #ifdef HAVE_TPETRA_MMM_TIMINGS
7600 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7602 ReducedRowMap = MyRowMap->removeEmptyProcesses();
7603 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
7604 destMat->removeEmptyProcessesInPlace(ReducedRowMap);
7606 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
7607 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
7610 MyRowMap = ReducedRowMap;
7611 MyDomainMap = ReducedDomainMap;
7612 MyRangeMap = ReducedRangeMap;
7615 if (!ReducedComm.is_null()) {
7616 MyPID = ReducedComm->getRank();
7621 ReducedComm = MyRowMap->getComm();
7628 RCP<const import_type> MyImporter = getGraph()->getImporter();
7631 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7633 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
7634 #ifdef HAVE_TPETRA_MMM_TIMINGS
7635 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
7643 Import_Util::getPids(*MyImporter, SourcePids,
false);
7644 }
else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
7647 #ifdef HAVE_TPETRA_MMM_TIMINGS
7648 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
7650 IntVectorType SourceDomain_pids(getDomainMap(),
true);
7651 IntVectorType SourceCol_pids(getColMap());
7653 SourceDomain_pids.putScalar(MyPID);
7655 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7656 SourcePids.resize(getColMap()->getLocalNumElements());
7657 SourceCol_pids.get1dCopy(SourcePids());
7658 }
else if (MyImporter.is_null()) {
7660 #ifdef HAVE_TPETRA_MMM_TIMINGS
7661 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
7663 SourcePids.resize(getColMap()->getLocalNumElements());
7664 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
7665 }
else if (!MyImporter.is_null() &&
7666 !domainTransfer.is_null()) {
7671 #ifdef HAVE_TPETRA_MMM_TIMINGS
7672 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
7676 IntVectorType TargetDomain_pids(domainMap);
7677 TargetDomain_pids.putScalar(MyPID);
7680 IntVectorType SourceDomain_pids(getDomainMap());
7683 IntVectorType SourceCol_pids(getColMap());
7685 if (!reverseMode && !xferDomainAsImport.is_null()) {
7686 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7687 }
else if (reverseMode && !xferDomainAsExport.is_null()) {
7688 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7689 }
else if (!reverseMode && !xferDomainAsExport.is_null()) {
7690 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7691 }
else if (reverseMode && !xferDomainAsImport.is_null()) {
7692 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7694 TEUCHOS_TEST_FOR_EXCEPTION(
7695 true, std::logic_error,
7696 "Tpetra::CrsMatrix::"
7697 "transferAndFillComplete: Should never get here! "
7698 "Please report this bug to a Tpetra developer.");
7700 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7701 SourcePids.resize(getColMap()->getLocalNumElements());
7702 SourceCol_pids.get1dCopy(SourcePids());
7703 }
else if (!MyImporter.is_null() &&
7704 BaseDomainMap->isSameAs(*BaseRowMap) &&
7705 getDomainMap()->isSameAs(*getRowMap())) {
7707 #ifdef HAVE_TPETRA_MMM_TIMINGS
7708 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
7711 IntVectorType TargetRow_pids(domainMap);
7712 IntVectorType SourceRow_pids(getRowMap());
7713 IntVectorType SourceCol_pids(getColMap());
7715 TargetRow_pids.putScalar(MyPID);
7716 if (!reverseMode && xferAsImport !=
nullptr) {
7717 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
7718 }
else if (reverseMode && xferAsExport !=
nullptr) {
7719 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
7720 }
else if (!reverseMode && xferAsExport !=
nullptr) {
7721 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
7722 }
else if (reverseMode && xferAsImport !=
nullptr) {
7723 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
7725 TEUCHOS_TEST_FOR_EXCEPTION(
7726 true, std::logic_error,
7727 "Tpetra::CrsMatrix::"
7728 "transferAndFillComplete: Should never get here! "
7729 "Please report this bug to a Tpetra developer.");
7732 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
7733 SourcePids.resize(getColMap()->getLocalNumElements());
7734 SourceCol_pids.get1dCopy(SourcePids());
7736 TEUCHOS_TEST_FOR_EXCEPTION(
7737 true, std::invalid_argument,
7738 "Tpetra::CrsMatrix::"
7739 "transferAndFillComplete: This method only allows either domainMap == "
7740 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
7741 "getDomainMap () == getRowMap ()).");
7745 size_t constantNumPackets = destMat->constantNumberOfPackets();
7747 #ifdef HAVE_TPETRA_MMM_TIMINGS
7748 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
7750 if (constantNumPackets == 0) {
7751 destMat->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7752 RemoteLIDs.view_host().size());
7758 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
7759 destMat->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
7765 #ifdef HAVE_TPETRA_MMM_TIMINGS
7766 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
7771 using Teuchos::outArg;
7772 using Teuchos::REDUCE_MAX;
7773 using Teuchos::reduceAll;
7774 RCP<const Teuchos::Comm<int>> comm = this->getComm();
7775 const int myRank = comm->getRank();
7777 std::ostringstream errStrm;
7781 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
7784 destMat->numExportPacketsPerLID_.modify_host();
7785 numExportPacketsPerLID =
7787 }
catch (std::exception& e) {
7788 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
7789 << e.what() << std::endl;
7792 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
7793 "an exception not a subclass of std::exception"
7798 if (!comm.is_null()) {
7799 reduceAll<int, int>(*comm, REDUCE_MAX, lclErr, outArg(gblErr));
7802 ::Tpetra::Details::gathervPrint(cerr, errStrm.str(), *comm);
7803 TEUCHOS_TEST_FOR_EXCEPTION(
7804 true, std::runtime_error,
7805 "getArrayViewFromDualView threw an "
7806 "exception on at least one process.");
7810 std::ostringstream os;
7811 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
7813 std::cerr << os.str();
7818 numExportPacketsPerLID,
7821 constantNumPackets);
7822 }
catch (std::exception& e) {
7823 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
7824 << e.what() << std::endl;
7827 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
7828 "an exception not a subclass of std::exception"
7834 std::ostringstream os;
7835 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
7837 std::cerr << os.str();
7840 if (!comm.is_null()) {
7841 reduceAll<int, int>(*comm, REDUCE_MAX, lclErr, outArg(gblErr));
7844 ::Tpetra::Details::gathervPrint(cerr, errStrm.str(), *comm);
7845 TEUCHOS_TEST_FOR_EXCEPTION(
7846 true, std::runtime_error,
7847 "packCrsMatrixWithOwningPIDs threw an "
7848 "exception on at least one process.");
7852 destMat->numExportPacketsPerLID_.modify_host();
7853 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7856 std::ostringstream os;
7857 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
7859 std::cerr << os.str();
7863 numExportPacketsPerLID,
7866 constantNumPackets);
7868 std::ostringstream os;
7869 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
7871 std::cerr << os.str();
7878 #ifdef HAVE_TPETRA_MMM_TIMINGS
7879 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
7881 if (!communication_needed) {
7883 std::ostringstream os;
7884 os << *verbosePrefix <<
"Communication not needed" << std::endl;
7885 std::cerr << os.str();
7889 if (constantNumPackets == 0) {
7891 std::ostringstream os;
7892 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
7894 std::cerr << os.str();
7899 destMat->numExportPacketsPerLID_.sync_host();
7900 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7902 destMat->numImportPacketsPerLID_.sync_host();
7903 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7907 std::ostringstream os;
7908 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
7910 std::cerr << os.str();
7912 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
7913 destMat->numImportPacketsPerLID_.view_host());
7915 std::ostringstream os;
7916 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
7918 std::cerr << os.str();
7921 size_t totalImportPackets = 0;
7923 totalImportPackets += numImportPacketsPerLID[i];
7928 destMat->reallocImportsIfNeeded(totalImportPackets, verbose,
7929 verbosePrefix.get());
7930 destMat->imports_.modify_host();
7931 auto hostImports = destMat->imports_.view_host();
7934 destMat->exports_.sync_host();
7935 auto hostExports = destMat->exports_.view_host();
7937 std::ostringstream os;
7938 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
7940 std::cerr << os.str();
7942 Distor.doReversePostsAndWaits(hostExports,
7943 numExportPacketsPerLID,
7945 numImportPacketsPerLID);
7947 std::ostringstream os;
7948 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
7950 std::cerr << os.str();
7954 std::ostringstream os;
7955 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
7957 std::cerr << os.str();
7959 destMat->imports_.modify_host();
7960 auto hostImports = destMat->imports_.view_host();
7963 destMat->exports_.sync_host();
7964 auto hostExports = destMat->exports_.view_host();
7966 std::ostringstream os;
7967 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
7969 std::cerr << os.str();
7971 Distor.doReversePostsAndWaits(hostExports,
7975 std::ostringstream os;
7976 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
7978 std::cerr << os.str();
7982 if (constantNumPackets == 0) {
7984 std::ostringstream os;
7985 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
7987 std::cerr << os.str();
7992 destMat->numExportPacketsPerLID_.sync_host();
7993 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7995 destMat->numImportPacketsPerLID_.sync_host();
7996 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7999 std::ostringstream os;
8000 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8002 std::cerr << os.str();
8004 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8005 destMat->numImportPacketsPerLID_.view_host());
8007 std::ostringstream os;
8008 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8010 std::cerr << os.str();
8013 size_t totalImportPackets = 0;
8015 totalImportPackets += numImportPacketsPerLID[i];
8020 destMat->reallocImportsIfNeeded(totalImportPackets, verbose,
8021 verbosePrefix.get());
8022 destMat->imports_.modify_host();
8023 auto hostImports = destMat->imports_.view_host();
8026 destMat->exports_.sync_host();
8027 auto hostExports = destMat->exports_.view_host();
8029 std::ostringstream os;
8030 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8032 std::cerr << os.str();
8034 Distor.doPostsAndWaits(hostExports,
8035 numExportPacketsPerLID,
8037 numImportPacketsPerLID);
8039 std::ostringstream os;
8040 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8042 std::cerr << os.str();
8046 std::ostringstream os;
8047 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8049 std::cerr << os.str();
8051 destMat->imports_.modify_host();
8052 auto hostImports = destMat->imports_.view_host();
8055 destMat->exports_.sync_host();
8056 auto hostExports = destMat->exports_.view_host();
8058 std::ostringstream os;
8059 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8061 std::cerr << os.str();
8063 Distor.doPostsAndWaits(hostExports,
8067 std::ostringstream os;
8068 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8070 std::cerr << os.str();
8081 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8083 Teuchos::Array<int> RemotePids;
8085 Teuchos::Array<int> TargetPids;
8091 destMat->numImportPacketsPerLID_.modify_host();
8093 #ifdef HAVE_TPETRA_MMM_TIMINGS
8094 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8096 ArrayRCP<size_t> CSR_rowptr;
8097 ArrayRCP<GO> CSR_colind_GID;
8098 ArrayRCP<LO> CSR_colind_LID;
8099 ArrayRCP<Scalar> CSR_vals;
8101 destMat->imports_.sync_device();
8102 destMat->numImportPacketsPerLID_.sync_device();
8104 size_t N = BaseRowMap->getLocalNumElements();
8106 auto RemoteLIDs_d = RemoteLIDs.view_device();
8107 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8108 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8113 destMat->imports_.view_device(),
8114 destMat->numImportPacketsPerLID_.view_device(),
8128 if (
typeid(LO) ==
typeid(GO)) {
8129 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
8131 CSR_colind_LID.resize(CSR_colind_GID.size());
8133 CSR_colind_LID.resize(CSR_colind_GID.size());
8138 for (
size_t i = 0; i < static_cast<size_t>(TargetPids.size()); i++) {
8139 if (TargetPids[i] == -1) TargetPids[i] = MyPID;
8141 #ifdef HAVE_TPETRA_MMM_TIMINGS
8142 tmCopySPRdata = Teuchos::null;
8151 std::ostringstream os;
8152 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8154 std::cerr << os.str();
8157 #ifdef HAVE_TPETRA_MMM_TIMINGS
8158 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8160 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr(),
8170 std::ostringstream os;
8171 os << *verbosePrefix <<
"restrictComm="
8172 << (restrictComm ?
"true" :
"false") << std::endl;
8173 std::cerr << os.str();
8180 #ifdef HAVE_TPETRA_MMM_TIMINGS
8181 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8184 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
8185 MyColMap = ReducedColMap;
8190 std::ostringstream os;
8191 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8192 std::cerr << os.str();
8194 destMat->replaceColMap(MyColMap);
8201 if (ReducedComm.is_null()) {
8203 std::ostringstream os;
8204 os << *verbosePrefix <<
"I am no longer in the communicator; "
8207 std::cerr << os.str();
8216 if ((!reverseMode && xferAsImport !=
nullptr) ||
8217 (reverseMode && xferAsExport !=
nullptr)) {
8219 std::ostringstream os;
8220 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8221 std::cerr << os.str();
8223 #ifdef HAVE_TPETRA_MMM_TIMINGS
8224 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8226 Import_Util::sortCrsEntries(CSR_rowptr(),
8229 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
8230 (reverseMode && xferAsImport !=
nullptr)) {
8232 std::ostringstream os;
8233 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8235 std::cerr << os.str();
8237 #ifdef HAVE_TPETRA_MMM_TIMINGS
8238 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8240 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
8243 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8244 CSR_colind_LID.resize(CSR_rowptr[N]);
8245 CSR_vals.resize(CSR_rowptr[N]);
8248 TEUCHOS_TEST_FOR_EXCEPTION(
8249 true, std::logic_error,
8250 "Tpetra::CrsMatrix::"
8251 "transferAndFillComplete: Should never get here! "
8252 "Please report this bug to a Tpetra developer.");
8259 std::ostringstream os;
8260 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8261 std::cerr << os.str();
8270 #ifdef HAVE_TPETRA_MMM_TIMINGS
8271 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8273 destMat->setAllValues(CSR_rowptr, CSR_colind_LID, CSR_vals);
8284 destMat->numImportPacketsPerLID_.modify_host();
8286 #ifdef HAVE_TPETRA_MMM_TIMINGS
8287 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8289 ArrayRCP<size_t> CSR_rowptr;
8290 ArrayRCP<GO> CSR_colind_GID;
8291 ArrayRCP<LO> CSR_colind_LID;
8292 ArrayRCP<Scalar> CSR_vals;
8294 destMat->imports_.sync_device();
8295 destMat->numImportPacketsPerLID_.sync_device();
8297 size_t N = BaseRowMap->getLocalNumElements();
8299 auto RemoteLIDs_d = RemoteLIDs.view_device();
8300 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8301 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8303 Kokkos::View<size_t*, device_type> CSR_rowptr_d;
8304 Kokkos::View<GO*, device_type> CSR_colind_GID_d;
8305 Kokkos::View<LO*, device_type> CSR_colind_LID_d;
8306 Kokkos::View<impl_scalar_type*, device_type> CSR_vals_d;
8307 Kokkos::View<int*, device_type> TargetPids_d;
8312 destMat->imports_.view_device(),
8313 destMat->numImportPacketsPerLID_.view_device(),
8325 Kokkos::resize(CSR_colind_LID_d, CSR_colind_GID_d.size());
8327 #ifdef HAVE_TPETRA_MMM_TIMINGS
8328 tmCopySPRdata = Teuchos::null;
8337 std::ostringstream os;
8338 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8340 std::cerr << os.str();
8343 #ifdef HAVE_TPETRA_MMM_TIMINGS
8344 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8346 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8356 std::ostringstream os;
8357 os << *verbosePrefix <<
"restrictComm="
8358 << (restrictComm ?
"true" :
"false") << std::endl;
8359 std::cerr << os.str();
8366 #ifdef HAVE_TPETRA_MMM_TIMINGS
8367 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8370 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
8371 MyColMap = ReducedColMap;
8376 std::ostringstream os;
8377 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8378 std::cerr << os.str();
8380 destMat->replaceColMap(MyColMap);
8387 if (ReducedComm.is_null()) {
8389 std::ostringstream os;
8390 os << *verbosePrefix <<
"I am no longer in the communicator; "
8393 std::cerr << os.str();
8403 if ((!reverseMode && xferAsImport !=
nullptr) ||
8404 (reverseMode && xferAsExport !=
nullptr)) {
8406 std::ostringstream os;
8407 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8408 std::cerr << os.str();
8410 #ifdef HAVE_TPETRA_MMM_TIMINGS
8411 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8413 Import_Util::sortCrsEntries(CSR_rowptr_d,
8416 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
8417 (reverseMode && xferAsImport !=
nullptr)) {
8419 std::ostringstream os;
8420 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8422 std::cerr << os.str();
8424 #ifdef HAVE_TPETRA_MMM_TIMINGS
8425 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8427 Import_Util::sortAndMergeCrsEntries(CSR_rowptr_d,
8431 TEUCHOS_TEST_FOR_EXCEPTION(
8432 true, std::logic_error,
8433 "Tpetra::CrsMatrix::"
8434 "transferAndFillComplete: Should never get here! "
8435 "Please report this bug to a Tpetra developer.");
8443 std::ostringstream os;
8444 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8445 std::cerr << os.str();
8449 #ifdef HAVE_TPETRA_MMM_TIMINGS
8450 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8452 destMat->setAllValues(CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8460 #ifdef HAVE_TPETRA_MMM_TIMINGS
8461 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8464 Teuchos::ParameterList esfc_params;
8466 RCP<import_type> MyImport;
8469 if (iallreduceRequest.get() !=
nullptr) {
8471 std::ostringstream os;
8472 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8474 std::cerr << os.str();
8476 iallreduceRequest->wait();
8477 if (reduced_mismatch != 0) {
8483 #ifdef HAVE_TPETRA_MMM_TIMINGS
8484 Teuchos::TimeMonitor MMisMM(*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8489 std::ostringstream os;
8490 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8491 std::cerr << os.str();
8494 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8495 Teuchos::ArrayRCP<int> type3PIDs;
8496 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8497 auto colind = getCrsGraph()->getLocalIndicesHost();
8500 std::ostringstream os;
8501 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8502 std::cerr << os.str();
8506 #ifdef HAVE_TPETRA_MMM_TIMINGS
8507 TimeMonitor tm_rnd(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8509 Import_Util::reverseNeighborDiscovery(*
this,
8521 std::ostringstream os;
8522 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8523 std::cerr << os.str();
8526 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8527 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8529 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8530 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8532 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8534 std::vector<bool> IsOwned(numCols,
true);
8535 std::vector<int> SentTo(numCols, -1);
8536 if (!MyImporter.is_null()) {
8537 for (
auto&& rlid : MyImporter->getRemoteLIDs()) {
8538 IsOwned[rlid] =
false;
8542 std::vector<std::pair<int, GO>> usrtg;
8543 usrtg.reserve(TEPID2.size());
8546 const auto& colMap = *(this->getColMap());
8548 const LO row = TELID2[i];
8549 const int pid = TEPID2[i];
8550 for (
auto j = rowptr[row]; j < rowptr[row + 1]; ++j) {
8551 const int col = colind[j];
8552 if (IsOwned[col] && SentTo[col] != pid) {
8554 GO gid = colMap.getGlobalElement(col);
8555 usrtg.push_back(std::pair<int, GO>(pid, gid));
8563 auto eopg = std ::unique(usrtg.begin(), usrtg.end());
8565 usrtg.erase(eopg, usrtg.end());
8568 Teuchos::ArrayRCP<int> EPID2 = Teuchos::arcp(
new int[type2_us_size], 0, type2_us_size,
true);
8569 Teuchos::ArrayRCP<LO> ELID2 = Teuchos::arcp(
new LO[type2_us_size], 0, type2_us_size,
true);
8572 for (
auto&& p : usrtg) {
8573 EPID2[pos] = p.first;
8574 ELID2[pos] = this->getDomainMap()->getLocalElement(p.second);
8578 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8579 Teuchos::ArrayView<LO> ELID3 = type3LIDs();
8580 GO InfGID = std::numeric_limits<GO>::max();
8581 int InfPID = INT_MAX;
8584 #endif // TPETRA_MIN3
8585 #define TPETRA_MIN3(x, y, z) ((x) < (y) ? (std::min(x, z)) : (std::min(y, z)))
8586 int i1 = 0, i2 = 0, i3 = 0;
8587 int Len1 = EPID1.size();
8588 int Len2 = EPID2.size();
8589 int Len3 = EPID3.size();
8591 int MyLen = Len1 + Len2 + Len3;
8592 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen], 0, MyLen,
true);
8593 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen], 0, MyLen,
true);
8596 while (i1 < Len1 || i2 < Len2 || i3 < Len3) {
8597 int PID1 = (i1 < Len1) ? (EPID1[i1]) : InfPID;
8598 int PID2 = (i2 < Len2) ? (EPID2[i2]) : InfPID;
8599 int PID3 = (i3 < Len3) ? (EPID3[i3]) : InfPID;
8601 GO GID1 = (i1 < Len1) ? getDomainMap()->getGlobalElement(ELID1[i1]) : InfGID;
8602 GO GID2 = (i2 < Len2) ? getDomainMap()->getGlobalElement(ELID2[i2]) : InfGID;
8603 GO GID3 = (i3 < Len3) ? getDomainMap()->getGlobalElement(ELID3[i3]) : InfGID;
8605 int MIN_PID = TPETRA_MIN3(PID1, PID2, PID3);
8606 GO MIN_GID = TPETRA_MIN3(((PID1 == MIN_PID) ? GID1 : InfGID), ((PID2 == MIN_PID) ? GID2 : InfGID), ((PID3 == MIN_PID) ? GID3 : InfGID));
8609 #endif // TPETRA_MIN3
8610 bool added_entry =
false;
8612 if (PID1 == MIN_PID && GID1 == MIN_GID) {
8613 userExportLIDs[iloc] = ELID1[i1];
8614 userExportPIDs[iloc] = EPID1[i1];
8619 if (PID2 == MIN_PID && GID2 == MIN_GID) {
8621 userExportLIDs[iloc] = ELID2[i2];
8622 userExportPIDs[iloc] = EPID2[i2];
8628 if (PID3 == MIN_PID && GID3 == MIN_GID) {
8630 userExportLIDs[iloc] = ELID3[i3];
8631 userExportPIDs[iloc] = EPID3[i3];
8639 std::ostringstream os;
8640 os << *verbosePrefix <<
"Create Import" << std::endl;
8641 std::cerr << os.str();
8644 #ifdef HAVE_TPETRA_MMM_TIMINGS
8645 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
8647 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
8649 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8650 MyImport = rcp(
new import_type(MyDomainMap,
8653 userExportLIDs.view(0, iloc).getConst(),
8654 userExportPIDs.view(0, iloc).getConst(),
8658 std::ostringstream os;
8659 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
8660 std::cerr << os.str();
8664 #ifdef HAVE_TPETRA_MMM_TIMINGS
8665 TimeMonitor esfc(*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
8666 esfc_params.set(
"Timer Label", label + std::string(
"isMM eSFC"));
8668 if (!params.is_null())
8669 esfc_params.set(
"compute global constants", params->get(
"compute global constants",
true));
8670 destMat->expertStaticFillComplete(MyDomainMap, MyRangeMap, MyImport, Teuchos::null, rcp(
new Teuchos::ParameterList(esfc_params)));
8675 #ifdef HAVE_TPETRA_MMM_TIMINGS
8676 TimeMonitor MMnotMMblock(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
8679 std::ostringstream os;
8680 os << *verbosePrefix <<
"Create Import" << std::endl;
8681 std::cerr << os.str();
8684 #ifdef HAVE_TPETRA_MMM_TIMINGS
8685 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
8687 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
8688 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
8689 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8690 MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids, mypars));
8693 std::ostringstream os;
8694 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
8695 std::cerr << os.str();
8698 #ifdef HAVE_TPETRA_MMM_TIMINGS
8699 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
8700 esfc_params.set(
"Timer Label", prefix + std::string(
"notMM eSFC"));
8702 esfc_params.set(
"Timer Label", std::string(
"notMM eSFC"));
8705 if (!params.is_null()) {
8706 esfc_params.set(
"compute global constants",
8707 params->get(
"compute global constants",
true));
8709 destMat->expertStaticFillComplete(MyDomainMap, MyRangeMap,
8710 MyImport, Teuchos::null,
8711 rcp(
new Teuchos::ParameterList(esfc_params)));
8714 #ifdef HAVE_TPETRA_MMM_TIMINGS
8715 tmIESFC = Teuchos::null;
8719 std::ostringstream os;
8720 os << *verbosePrefix <<
"Done" << endl;
8721 std::cerr << os.str();
8725 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8727 importAndFillComplete(Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& destMatrix,
8728 const import_type& importer,
8729 const Teuchos::RCP<const map_type>& domainMap,
8730 const Teuchos::RCP<const map_type>& rangeMap,
8731 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
8732 transferAndFillComplete(destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
8735 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8737 importAndFillComplete(Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& destMatrix,
8738 const import_type& rowImporter,
8739 const import_type& domainImporter,
8740 const Teuchos::RCP<const map_type>& domainMap,
8741 const Teuchos::RCP<const map_type>& rangeMap,
8742 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
8743 transferAndFillComplete(destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
8746 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8748 exportAndFillComplete(Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& destMatrix,
8749 const export_type& exporter,
8750 const Teuchos::RCP<const map_type>& domainMap,
8751 const Teuchos::RCP<const map_type>& rangeMap,
8752 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
8753 transferAndFillComplete(destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
8756 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8758 exportAndFillComplete(Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& destMatrix,
8759 const export_type& rowExporter,
8760 const export_type& domainExporter,
8761 const Teuchos::RCP<const map_type>& domainMap,
8762 const Teuchos::RCP<const map_type>& rangeMap,
8763 const Teuchos::RCP<Teuchos::ParameterList>& params)
const {
8764 transferAndFillComplete(destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
8775 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8777 template class CrsMatrix<SCALAR, LO, GO, NODE>;
8779 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO, SI, LO, GO, NODE) \
8781 template Teuchos::RCP<CrsMatrix<SO, LO, GO, NODE>> \
8782 CrsMatrix<SI, LO, GO, NODE>::convert<SO>() const;
8784 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8786 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8787 importAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8788 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8789 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8790 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
8791 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8792 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8793 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8794 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8795 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8796 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8797 const Teuchos::RCP<Teuchos::ParameterList>& params);
8799 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8801 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8802 importAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8803 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8804 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8805 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
8806 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8807 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8808 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
8809 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8810 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8811 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8812 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8813 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8814 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8815 const Teuchos::RCP<Teuchos::ParameterList>& params);
8817 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8819 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8820 exportAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8821 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8822 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8823 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
8824 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8825 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8826 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8827 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8828 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8829 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8830 const Teuchos::RCP<Teuchos::ParameterList>& params);
8832 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8834 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8835 exportAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8836 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8837 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8838 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
8839 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8840 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8841 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
8842 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8843 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8844 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8845 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8846 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8847 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8848 const Teuchos::RCP<Teuchos::ParameterList>& params);
8850 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8851 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8852 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8853 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8854 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8855 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
8857 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
mag_type getNormInf() const
Compute and return the infinity norm of the matrix.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
mag_type getNorm1(bool assumeSymmetric=false) const
Compute and return the 1-norm of the matrix.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
Declaration of Tpetra::Details::EquilibrationInfo.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::host_mirror_type num_row_entries_type
Row offsets for "1-D" storage.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Declaration of Tpetra::computeRowAndColumnOneNorms.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.