40 #ifndef TPETRA_CRSMATRIX_DEF_HPP
41 #define TPETRA_CRSMATRIX_DEF_HPP
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_LocalCrsMatrixOperator.hpp"
62 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
70 #include "Tpetra_Details_packCrsMatrix.hpp"
71 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
73 #include "Teuchos_FancyOStream.hpp"
74 #include "Teuchos_RCP.hpp"
75 #include "Teuchos_DataAccess.hpp"
76 #include "Teuchos_SerialDenseMatrix.hpp"
77 #include "KokkosBlas1_scal.hpp"
78 #include "KokkosSparse_getDiagCopy.hpp"
79 #include "KokkosSparse_spmv.hpp"
91 template<
class T,
class BinaryFunction>
92 T atomic_binary_function_update (
volatile T*
const dest,
106 T newVal = f (assume, inputVal);
107 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
108 }
while (assume != oldVal);
128 template<
class Scalar>
132 typedef Teuchos::ScalarTraits<Scalar> STS;
133 return std::max (STS::magnitude (x), STS::magnitude (y));
142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
144 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
145 size_t maxNumEntriesPerRow,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) :
149 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
150 "[, RCP<ParameterList>]): ";
151 Teuchos::RCP<crs_graph_type> graph;
153 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
156 catch (std::exception& e) {
157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
158 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
159 "size_t [, RCP<ParameterList>]) threw an exception: "
166 staticGraph_ = myGraph_;
171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
174 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
175 const Teuchos::RCP<Teuchos::ParameterList>& params) :
178 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
179 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
180 Teuchos::RCP<crs_graph_type> graph;
186 catch (std::exception& e) {
187 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
188 (
true, std::runtime_error,
"CrsGraph constructor "
189 "(RCP<const Map>, ArrayView<const size_t>"
190 "[, RCP<ParameterList>]) threw an exception: "
197 staticGraph_ = graph;
202 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
205 const Teuchos::RCP<const map_type>& colMap,
206 const size_t maxNumEntPerRow,
207 const Teuchos::RCP<Teuchos::ParameterList>& params) :
210 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
211 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
212 const char suffix[] =
213 " Please report this bug to the Tpetra developers.";
216 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
217 (! staticGraph_.is_null (), std::logic_error,
218 "staticGraph_ is not null at the beginning of the constructor."
220 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
221 (! myGraph_.is_null (), std::logic_error,
222 "myGraph_ is not null at the beginning of the constructor."
224 Teuchos::RCP<crs_graph_type> graph;
230 catch (std::exception& e) {
231 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
232 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
233 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
234 "exception: " << e.what ());
240 staticGraph_ = myGraph_;
245 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
248 const Teuchos::RCP<const map_type>& colMap,
249 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
250 const Teuchos::RCP<Teuchos::ParameterList>& params) :
253 const char tfecfFuncName[] =
254 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
255 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
256 Teuchos::RCP<crs_graph_type> graph;
262 catch (std::exception& e) {
263 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
264 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
265 "RCP<const Map>, ArrayView<const size_t>[, "
266 "RCP<ParameterList>]) threw an exception: " << e.what ());
272 staticGraph_ = graph;
278 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
280 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
281 const Teuchos::RCP<Teuchos::ParameterList>& ) :
283 staticGraph_ (graph),
284 storageStatus_ (Details::STORAGE_1D_PACKED)
287 typedef typename local_matrix_device_type::values_type values_type;
288 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
289 "RCP<ParameterList>]): ";
292 std::unique_ptr<std::string> prefix;
294 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
295 std::ostringstream os;
296 os << *prefix <<
"Start" << endl;
297 std::cerr << os.str ();
300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
301 (graph.is_null (), std::runtime_error,
"Input graph is null.");
302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
303 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
304 "is not fill complete. You must call fillComplete on the "
305 "graph before using it to construct a CrsMatrix. Note that "
306 "calling resumeFill on the graph makes it not fill complete, "
307 "even if you had previously called fillComplete. In that "
308 "case, you must call fillComplete on the graph again.");
316 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
318 std::ostringstream os;
319 os << *prefix <<
"Allocate values: " << numEnt << endl;
320 std::cerr << os.str ();
323 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
325 valuesUnpacked_wdv = valuesPacked_wdv;
330 std::ostringstream os;
331 os << *prefix <<
"Done" << endl;
332 std::cerr << os.str ();
336 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
339 const Teuchos::RCP<const crs_graph_type>& graph,
340 const Teuchos::RCP<Teuchos::ParameterList>& params) :
342 staticGraph_ (graph),
343 storageStatus_ (matrix.storageStatus_)
345 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
346 "local_matrix_device_type::values_type, "
347 "[,RCP<ParameterList>]): ";
348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
349 (graph.is_null (), std::runtime_error,
"Input graph is null.");
350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
351 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
352 "is not fill complete. You must call fillComplete on the "
353 "graph before using it to construct a CrsMatrix. Note that "
354 "calling resumeFill on the graph makes it not fill complete, "
355 "even if you had previously called fillComplete. In that "
356 "case, you must call fillComplete on the graph again.");
358 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
359 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
361 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
362 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
368 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
370 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
371 const typename local_matrix_device_type::values_type& values,
372 const Teuchos::RCP<Teuchos::ParameterList>& ) :
374 staticGraph_ (graph),
375 storageStatus_ (Details::STORAGE_1D_PACKED)
377 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
378 "local_matrix_device_type::values_type, "
379 "[,RCP<ParameterList>]): ";
380 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
381 (graph.is_null (), std::runtime_error,
"Input graph is null.");
382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
383 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
384 "is not fill complete. You must call fillComplete on the "
385 "graph before using it to construct a CrsMatrix. Note that "
386 "calling resumeFill on the graph makes it not fill complete, "
387 "even if you had previously called fillComplete. In that "
388 "case, you must call fillComplete on the graph again.");
397 valuesUnpacked_wdv = valuesPacked_wdv;
408 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
411 const Teuchos::RCP<const map_type>& colMap,
412 const typename local_graph_device_type::row_map_type& rowPointers,
413 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
414 const typename local_matrix_device_type::values_type& values,
415 const Teuchos::RCP<Teuchos::ParameterList>& params) :
417 storageStatus_ (Details::STORAGE_1D_PACKED)
419 using Details::getEntryOnHost;
422 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
423 "RCP<const Map>, ptr, ind, val[, params]): ";
424 const char suffix[] =
425 ". Please report this bug to the Tpetra developers.";
429 std::unique_ptr<std::string> prefix;
431 prefix = this->createPrefix(
432 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
433 std::ostringstream os;
434 os << *prefix <<
"Start" << endl;
435 std::cerr << os.str ();
442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
443 (values.extent(0) != columnIndices.extent(0),
444 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
445 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
447 if (debug && rowPointers.extent(0) != 0) {
448 const size_t numEnt =
449 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
450 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
451 (numEnt !=
size_t(columnIndices.extent(0)) ||
452 numEnt !=
size_t(values.extent(0)),
453 std::invalid_argument,
"Last entry of rowPointers says that "
454 "the matrix has " << numEnt <<
" entr"
455 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
456 "columnIndices and values don't match this. "
457 "columnIndices.extent(0)=" << columnIndices.extent (0)
458 <<
" and values.extent(0)=" << values.extent (0) <<
".");
461 RCP<crs_graph_type> graph;
463 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
464 columnIndices, params));
466 catch (std::exception& e) {
467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
468 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
469 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
477 auto lclGraph = graph->getLocalGraphDevice ();
478 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
479 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
480 lclGraph.entries.extent (0) != columnIndices.extent (0),
481 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
482 "ind[, params]) did not set the local graph correctly." << suffix);
483 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
484 (lclGraph.entries.extent (0) != values.extent (0),
485 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
486 "params]) did not set the local graph correctly. "
487 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
488 <<
" != values.extent(0) = " << values.extent (0) << suffix);
494 staticGraph_ = graph;
504 valuesUnpacked_wdv = valuesPacked_wdv;
513 std::ostringstream os;
514 os << *prefix <<
"Done" << endl;
515 std::cerr << os.str();
519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
522 const Teuchos::RCP<const map_type>& colMap,
523 const Teuchos::ArrayRCP<size_t>& ptr,
524 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
525 const Teuchos::ArrayRCP<Scalar>& val,
526 const Teuchos::RCP<Teuchos::ParameterList>& params) :
528 storageStatus_ (Details::STORAGE_1D_PACKED)
530 using Kokkos::Compat::getKokkosViewDeepCopy;
531 using Teuchos::av_reinterpret_cast;
533 using values_type =
typename local_matrix_device_type::values_type;
535 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
536 "RCP<const Map>, ptr, ind, val[, params]): ";
538 RCP<crs_graph_type> graph;
543 catch (std::exception& e) {
544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
545 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
546 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
547 "RCP<ParameterList>]) threw an exception: " << e.what ());
553 staticGraph_ = graph;
566 auto lclGraph = staticGraph_->getLocalGraphDevice ();
567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
568 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
569 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
570 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
571 "ptr, ind[, params]) did not set the local graph correctly. "
572 "Please report this bug to the Tpetra developers.");
575 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
577 valuesUnpacked_wdv = valuesPacked_wdv;
587 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
590 const Teuchos::RCP<const map_type>& colMap,
592 const Teuchos::RCP<Teuchos::ParameterList>& params) :
594 storageStatus_ (Details::STORAGE_1D_PACKED),
597 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
598 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
599 const char suffix[] =
600 " Please report this bug to the Tpetra developers.";
602 Teuchos::RCP<crs_graph_type> graph;
605 lclMatrix.graph, params));
607 catch (std::exception& e) {
608 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
609 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
610 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
611 "exception: " << e.what ());
613 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
614 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
615 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
616 "did not produce a fill-complete graph. Please report this bug to the "
617 "Tpetra developers.");
622 staticGraph_ = graph;
625 valuesUnpacked_wdv = valuesPacked_wdv;
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
629 "At the end of a CrsMatrix constructor that should produce "
630 "a fillComplete matrix, isFillActive() is true." << suffix);
631 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
633 "CrsMatrix constructor that should produce a fillComplete "
634 "matrix, isFillComplete() is false." << suffix);
638 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
641 const Teuchos::RCP<const map_type>& rowMap,
642 const Teuchos::RCP<const map_type>& colMap,
643 const Teuchos::RCP<const map_type>& domainMap,
644 const Teuchos::RCP<const map_type>& rangeMap,
645 const Teuchos::RCP<Teuchos::ParameterList>& params) :
647 storageStatus_ (Details::STORAGE_1D_PACKED),
650 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
651 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
652 "local_matrix_device_type[, RCP<ParameterList>]): ";
653 const char suffix[] =
654 " Please report this bug to the Tpetra developers.";
656 Teuchos::RCP<crs_graph_type> graph;
658 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
659 domainMap, rangeMap, params));
661 catch (std::exception& e) {
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
663 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
664 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
665 "RCP<ParameterList>]) threw an exception: " << e.what ());
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
668 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
669 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
670 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
671 "not produce a fillComplete graph." << suffix);
676 staticGraph_ = graph;
679 valuesUnpacked_wdv = valuesPacked_wdv;
681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
683 "At the end of a CrsMatrix constructor that should produce "
684 "a fillComplete matrix, isFillActive() is true." << suffix);
685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
687 "CrsMatrix constructor that should produce a fillComplete "
688 "matrix, isFillComplete() is false." << suffix);
692 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
695 const Teuchos::RCP<const map_type>& rowMap,
696 const Teuchos::RCP<const map_type>& colMap,
697 const Teuchos::RCP<const map_type>& domainMap,
698 const Teuchos::RCP<const map_type>& rangeMap,
699 const Teuchos::RCP<const import_type>& importer,
700 const Teuchos::RCP<const export_type>& exporter,
701 const Teuchos::RCP<Teuchos::ParameterList>& params) :
703 storageStatus_ (Details::STORAGE_1D_PACKED),
707 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
708 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
709 const char suffix[] =
710 " Please report this bug to the Tpetra developers.";
712 Teuchos::RCP<crs_graph_type> graph;
715 domainMap, rangeMap, importer,
718 catch (std::exception& e) {
719 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
720 (
true, std::runtime_error,
"CrsGraph constructor "
721 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
722 "params) threw: " << e.what ());
724 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
725 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
726 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
727 "Export, params) did not produce a fill-complete graph. "
728 "Please report this bug to the Tpetra developers.");
733 staticGraph_ = graph;
736 valuesUnpacked_wdv = valuesPacked_wdv;
738 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
740 "At the end of a CrsMatrix constructor that should produce "
741 "a fillComplete matrix, isFillActive() is true." << suffix);
742 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
744 "CrsMatrix constructor that should produce a fillComplete "
745 "matrix, isFillComplete() is false." << suffix);
749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
752 const Teuchos::DataAccess copyOrView):
754 staticGraph_ (source.getCrsGraph()),
755 storageStatus_ (source.storageStatus_)
757 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
758 "const CrsMatrix&, const Teuchos::DataAccess): ";
759 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
761 "Source graph must be fillComplete().");
763 if (copyOrView == Teuchos::Copy) {
764 using values_type =
typename local_matrix_device_type::values_type;
766 using Kokkos::view_alloc;
767 using Kokkos::WithoutInitializing;
768 values_type newvals (view_alloc (
"val", WithoutInitializing),
773 valuesUnpacked_wdv = valuesPacked_wdv;
776 else if (copyOrView == Teuchos::View) {
782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
783 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
784 "has an invalid value " << copyOrView <<
". Valid values "
785 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
786 "Teuchos::View = " << Teuchos::View <<
".");
791 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
796 std::swap(crs_matrix.
importMV_, this->importMV_);
797 std::swap(crs_matrix.
exportMV_, this->exportMV_);
798 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
799 std::swap(crs_matrix.myGraph_, this->myGraph_);
800 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
801 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
804 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
807 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 Teuchos::RCP<const Teuchos::Comm<int> >
811 return getCrsGraphRef ().getComm ();
814 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
818 return fillComplete_;
821 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
825 return ! fillComplete_;
828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
832 return this->getCrsGraphRef ().isStorageOptimized ();
835 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
839 return getCrsGraphRef ().isLocallyIndexed ();
842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
846 return getCrsGraphRef ().isGloballyIndexed ();
849 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
853 return getCrsGraphRef ().hasColMap ();
856 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
860 return getCrsGraphRef ().getGlobalNumEntries ();
863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
867 return getCrsGraphRef ().getLocalNumEntries ();
870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
874 return getCrsGraphRef ().getGlobalNumRows ();
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
881 return getCrsGraphRef ().getGlobalNumCols ();
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 return getCrsGraphRef ().getLocalNumRows ();
892 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
896 return getCrsGraphRef ().getLocalNumCols ();
900 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
904 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
911 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
914 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
918 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
921 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
925 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
928 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
932 return getRowMap ()->getIndexBase ();
935 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
936 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
939 return getCrsGraphRef ().getRowMap ();
942 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
943 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
946 return getCrsGraphRef ().getColMap ();
949 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
950 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
953 return getCrsGraphRef ().getDomainMap ();
956 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
957 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
960 return getCrsGraphRef ().getRangeMap ();
963 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
964 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
967 if (staticGraph_ != Teuchos::null) {
973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
974 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
977 if (staticGraph_ != Teuchos::null) {
983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
988 #ifdef HAVE_TPETRA_DEBUG
989 constexpr
bool debug =
true;
991 constexpr
bool debug =
false;
992 #endif // HAVE_TPETRA_DEBUG
994 if (! this->staticGraph_.is_null ()) {
995 return * (this->staticGraph_);
999 const char tfecfFuncName[] =
"getCrsGraphRef: ";
1000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1001 (this->myGraph_.is_null (), std::logic_error,
1002 "Both staticGraph_ and myGraph_ are null. "
1003 "Please report this bug to the Tpetra developers.");
1005 return * (this->myGraph_);
1009 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1010 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
1014 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
1017 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
1018 staticGraph_->getLocalGraphDevice());
1021 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1022 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
1026 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1027 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
1028 valuesPacked_wdv.getHostView(Access::ReadWrite),
1029 staticGraph_->getLocalGraphHost());
1032 #if KOKKOSKERNELS_VERSION < 40299
1034 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1035 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1039 auto localMatrix = getLocalMatrixDevice();
1040 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
1041 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()))
1043 if(this->ordinalRowptrs.data() ==
nullptr)
1045 auto originalRowptrs = localMatrix.graph.row_map;
1048 this->ordinalRowptrs = ordinal_rowptrs_type(
1049 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1050 auto ordinalRowptrs_ = this->ordinalRowptrs;
1051 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1052 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1053 KOKKOS_LAMBDA(LocalOrdinal i)
1055 ordinalRowptrs_(i) = originalRowptrs(i);
1059 return std::make_shared<local_multiply_op_type>(
1060 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1064 return std::make_shared<local_multiply_op_type>(
1065 std::make_shared<local_matrix_device_type>(localMatrix));
1069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1073 return myGraph_.is_null ();
1076 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1083 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1099 const char tfecfFuncName[] =
"allocateValues: ";
1100 const char suffix[] =
1101 " Please report this bug to the Tpetra developers.";
1102 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1104 std::unique_ptr<std::string> prefix;
1106 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1107 std::ostringstream os;
1108 os << *prefix <<
"lg: "
1109 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1111 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1112 <<
"Allocated" << endl;
1113 std::cerr << os.str();
1116 const bool debug = Behavior::debug(
"CrsMatrix");
1118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1119 (this->staticGraph_.is_null (), std::logic_error,
1120 "staticGraph_ is null." << suffix);
1125 if ((gas == GraphAlreadyAllocated) !=
1126 staticGraph_->indicesAreAllocated ()) {
1127 const char err1[] =
"The caller has asserted that the graph "
1129 const char err2[] =
"already allocated, but the static graph "
1130 "says that its indices are ";
1131 const char err3[] =
"already allocated. ";
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (gas == GraphAlreadyAllocated &&
1134 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1135 err1 << err2 <<
"not " << err3 << suffix);
1136 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1137 (gas != GraphAlreadyAllocated &&
1138 staticGraph_->indicesAreAllocated (), std::logic_error,
1139 err1 <<
"not " << err2 << err3 << suffix);
1147 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1148 (! this->staticGraph_->indicesAreAllocated () &&
1149 this->myGraph_.is_null (), std::logic_error,
1150 "The static graph says that its indices are not allocated, "
1151 "but the graph is not owned by the matrix." << suffix);
1154 if (gas == GraphNotYetAllocated) {
1156 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1157 (this->myGraph_.is_null (), std::logic_error,
1158 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1161 this->myGraph_->allocateIndices (lg, verbose);
1163 catch (std::exception& e) {
1164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1165 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1166 "threw an exception: " << e.what ());
1169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1170 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1171 "threw an exception not a subclass of std::exception.");
1176 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1178 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1180 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1181 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1185 using values_type =
typename local_matrix_device_type::values_type;
1187 std::ostringstream os;
1188 os << *prefix <<
"Allocate values_wdv: Pre "
1189 << valuesUnpacked_wdv.extent(0) <<
", post "
1190 << lclTotalNumEntries << endl;
1191 std::cerr << os.str();
1195 values_type(
"Tpetra::CrsMatrix::values",
1196 lclTotalNumEntries));
1200 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1206 using ::Tpetra::Details::getEntryOnHost;
1207 using Teuchos::arcp_const_cast;
1208 using Teuchos::Array;
1209 using Teuchos::ArrayRCP;
1210 using Teuchos::null;
1214 using row_map_type =
typename local_graph_device_type::row_map_type;
1215 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1216 using values_type =
typename local_matrix_device_type::values_type;
1218 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1220 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1221 "fillComplete or expertStaticFillComplete): ";
1222 const char suffix[] =
1223 " Please report this bug to the Tpetra developers.";
1227 std::unique_ptr<std::string> prefix;
1229 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1230 std::ostringstream os;
1231 os << *prefix << endl;
1232 std::cerr << os.str ();
1238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1239 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1240 "(myGraph_) is null. This means that the matrix has a "
1241 "const (a.k.a. \"static\") graph. fillComplete or "
1242 "expertStaticFillComplete should never call "
1243 "fillLocalGraphAndMatrix in that case." << suffix);
1246 const size_t lclNumRows = this->getLocalNumRows ();
1261 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1262 myGraph_->rowPtrsUnpacked_dev_;
1265 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1266 (curRowOffsets.extent (0) == 0, std::logic_error,
1267 "curRowOffsets.extent(0) == 0.");
1268 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1269 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1270 "curRowOffsets.extent(0) = "
1271 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1272 << (lclNumRows + 1) <<
".");
1273 const size_t numOffsets = curRowOffsets.extent (0);
1274 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1277 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1278 std::logic_error,
"numOffsets = " <<
1279 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1280 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1281 << numOffsets <<
") = " << valToCheck <<
".");
1284 if (myGraph_->getLocalNumEntries() !=
1285 myGraph_->getLocalAllocationSize()) {
1289 typename row_map_type::non_const_type k_ptrs;
1290 row_map_type k_ptrs_const;
1291 lclinds_1d_type k_inds;
1295 std::ostringstream os;
1296 const auto numEnt = myGraph_->getLocalNumEntries();
1297 const auto allocSize = myGraph_->getLocalAllocationSize();
1298 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1299 <<
", allocSize=" << allocSize << endl;
1300 std::cerr << os.str ();
1308 if (debug && curRowOffsets.extent (0) != 0) {
1309 const size_t numOffsets =
1310 static_cast<size_t> (curRowOffsets.extent (0));
1311 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1312 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1313 (static_cast<size_t> (valToCheck) !=
1314 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1315 std::logic_error,
"(unpacked branch) Before "
1316 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1317 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1318 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1320 (static_cast<size_t> (valToCheck) !=
1321 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1322 std::logic_error,
"(unpacked branch) Before "
1323 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1324 <<
") = " << valToCheck
1325 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1326 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1334 size_t lclTotalNumEntries = 0;
1340 std::ostringstream os;
1341 os << *prefix <<
"Allocate packed row offsets: "
1342 << (lclNumRows+1) << endl;
1343 std::cerr << os.str ();
1345 typename row_map_type::non_const_type
1346 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1347 typename row_entries_type::const_type numRowEnt_h =
1348 myGraph_->k_numRowEntries_;
1351 lclTotalNumEntries =
1355 k_ptrs = packedRowOffsets;
1356 k_ptrs_const = k_ptrs;
1360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1361 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1363 "(unpacked branch) After packing k_ptrs, "
1364 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1365 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1366 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1368 (valToCheck != lclTotalNumEntries, std::logic_error,
1369 "(unpacked branch) After filling k_ptrs, "
1370 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1371 <<
" != total number of entries on the calling process = "
1372 << lclTotalNumEntries <<
".");
1377 std::ostringstream os;
1378 os << *prefix <<
"Allocate packed local column indices: "
1379 << lclTotalNumEntries << endl;
1380 std::cerr << os.str ();
1382 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1384 std::ostringstream os;
1385 os << *prefix <<
"Allocate packed values: "
1386 << lclTotalNumEntries << endl;
1387 std::cerr << os.str ();
1389 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1401 using inds_packer_type = pack_functor<
1402 typename Graph::local_graph_device_type::entries_type::non_const_type,
1403 typename Graph::local_inds_dualv_type::t_dev::const_type,
1404 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1405 typename Graph::local_graph_device_type::row_map_type>;
1406 inds_packer_type indsPacker (
1408 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1409 k_ptrs, curRowOffsets);
1411 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1412 Kokkos::parallel_for
1413 (
"Tpetra::CrsMatrix pack column indices",
1414 range_type (0, lclNumRows), indsPacker);
1418 using vals_packer_type = pack_functor<
1419 typename values_type::non_const_type,
1420 typename values_type::const_type,
1421 typename row_map_type::non_const_type,
1422 typename row_map_type::const_type>;
1423 vals_packer_type valsPacker (
1425 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1426 k_ptrs, curRowOffsets);
1427 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1428 range_type (0, lclNumRows), valsPacker);
1431 const char myPrefix[] =
"(\"Optimize Storage\""
1432 "=true branch) After packing, ";
1433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1434 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1435 <<
"k_ptrs.extent(0) = 0. This probably means that "
1436 "rowPtrsUnpacked_ was never allocated.");
1437 if (k_ptrs.extent (0) != 0) {
1438 const size_t numOffsets (k_ptrs.extent (0));
1439 const auto valToCheck =
1440 getEntryOnHost (k_ptrs, numOffsets - 1);
1441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1442 (
size_t (valToCheck) != k_vals.extent (0),
1443 std::logic_error, myPrefix <<
1444 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1445 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1447 (
size_t (valToCheck) != k_inds.extent (0),
1448 std::logic_error, myPrefix <<
1449 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1450 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1454 myGraph_->setRowPtrsPacked(k_ptrs_const);
1455 myGraph_->lclIndsPacked_wdv =
1462 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1463 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1464 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1465 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1466 valuesPacked_wdv = valuesUnpacked_wdv;
1469 std::ostringstream os;
1470 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1471 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1472 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1473 << valuesUnpacked_wdv.extent(0) << endl;
1474 std::cerr << os.str();
1478 const char myPrefix[] =
1479 "(\"Optimize Storage\"=false branch) ";
1480 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1482 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1483 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1484 "that rowPtrsUnpacked_ was never allocated.");
1485 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1486 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1487 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1488 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1489 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1490 std::logic_error, myPrefix <<
1491 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1492 <<
" != valuesPacked_wdv.extent(0) = "
1493 << valuesPacked_wdv.extent (0) <<
".");
1494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1495 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1496 std::logic_error, myPrefix <<
1497 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1498 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1499 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1505 const char myPrefix[] =
"After packing, ";
1506 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1507 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1508 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1509 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1510 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1511 (lclNumRows+1) <<
".");
1512 if (rowPtrsPackedHost.extent (0) != 0) {
1513 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1514 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1516 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1517 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1518 (numOffsets-1) <<
") = " << valToCheck
1519 <<
" != valuesPacked_wdv.extent(0) = "
1520 << valuesPacked_wdv.extent (0) <<
".");
1521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1522 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1523 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1524 (numOffsets-1) <<
") = " << valToCheck
1525 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1526 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1534 const bool defaultOptStorage =
1535 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1536 const bool requestOptimizedStorage =
1537 (! params.is_null () &&
1538 params->get (
"Optimize Storage", defaultOptStorage)) ||
1539 (params.is_null () && defaultOptStorage);
1544 if (requestOptimizedStorage) {
1549 std::ostringstream os;
1550 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1551 << myGraph_->k_numRowEntries_.extent(0) << endl;
1552 std::cerr << os.str();
1555 myGraph_->k_numRowEntries_ = row_entries_type ();
1560 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1561 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1562 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1563 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1564 valuesUnpacked_wdv = valuesPacked_wdv;
1566 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1567 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1571 std::ostringstream os;
1572 os << *prefix <<
"User requested NOT to optimize storage"
1574 std::cerr << os.str();
1579 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1584 using ::Tpetra::Details::ProfilingRegion;
1585 using Teuchos::ArrayRCP;
1586 using Teuchos::Array;
1587 using Teuchos::null;
1591 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1592 using non_const_row_map_type =
typename row_map_type::non_const_type;
1593 using values_type =
typename local_matrix_device_type::values_type;
1594 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1595 const size_t lclNumRows = getLocalNumRows();
1598 std::unique_ptr<std::string> prefix;
1600 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1601 std::ostringstream os;
1602 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1603 std::cerr << os.str ();
1615 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1616 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1617 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1619 row_map_type k_ptrs;
1625 bool requestOptimizedStorage =
true;
1626 const bool default_OptimizeStorage =
1627 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1628 if (! params.is_null() &&
1629 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1630 requestOptimizedStorage =
false;
1637 if (! staticGraph_->isStorageOptimized () &&
1638 requestOptimizedStorage) {
1640 (
true, std::runtime_error,
"You requested optimized storage "
1641 "by setting the \"Optimize Storage\" flag to \"true\" in "
1642 "the ParameterList, or by virtue of default behavior. "
1643 "However, the associated CrsGraph was filled separately and "
1644 "requested not to optimize storage. Therefore, the "
1645 "CrsMatrix cannot optimize storage.");
1646 requestOptimizedStorage =
false;
1671 if (nodeNumEntries != nodeNumAllocated) {
1673 std::ostringstream os;
1674 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1675 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1677 std::cerr << os.str();
1682 std::ostringstream os;
1683 os << *prefix <<
"Allocate packed row offsets: "
1684 << (lclNumRows+1) << endl;
1685 std::cerr << os.str();
1687 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1692 size_t lclTotalNumEntries = 0;
1695 typename row_entries_type::const_type numRowEnt_h =
1696 staticGraph_->k_numRowEntries_;
1698 lclTotalNumEntries =
1705 std::ostringstream os;
1706 os << *prefix <<
"Allocate packed values: "
1707 << lclTotalNumEntries << endl;
1708 std::cerr << os.str ();
1710 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1714 typename values_type::non_const_type,
1715 typename values_type::const_type,
1716 typename row_map_type::non_const_type,
1717 typename row_map_type::const_type> valsPacker
1718 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1719 tmpk_ptrs, k_rowPtrs);
1722 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1723 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1724 range_type (0, lclNumRows), valsPacker);
1728 valuesPacked_wdv = valuesUnpacked_wdv;
1730 std::ostringstream os;
1731 os << *prefix <<
"Storage already packed: "
1732 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1733 std::cerr << os.str();
1738 if (requestOptimizedStorage) {
1741 valuesUnpacked_wdv = valuesPacked_wdv;
1743 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1747 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1752 const typename crs_graph_type::SLocalGlobalViews& newInds,
1753 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1754 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1755 const ELocalGlobal lg,
1756 const ELocalGlobal I)
1758 const size_t oldNumEnt = rowInfo.numEntries;
1759 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1765 if (numInserted > 0) {
1766 const size_t startOffset = oldNumEnt;
1767 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1768 numInserted *
sizeof (impl_scalar_type));
1772 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1776 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1777 const Teuchos::ArrayView<const Scalar>& values,
1781 const char tfecfFuncName[] =
"insertLocalValues: ";
1783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1784 (! this->isFillActive (), std::runtime_error,
1785 "Fill is not active. After calling fillComplete, you must call "
1786 "resumeFill before you may insert entries into the matrix again.");
1787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1788 (this->isStaticGraph (), std::runtime_error,
1789 "Cannot insert indices with static graph; use replaceLocalValues() "
1793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1794 (graph.
colMap_.is_null (), std::runtime_error,
1795 "Cannot insert local indices without a column map.");
1796 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1798 std::runtime_error,
"Graph indices are global; use "
1799 "insertGlobalValues().");
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1801 (values.size () != indices.size (), std::runtime_error,
1802 "values.size() = " << values.size ()
1803 <<
" != indices.size() = " << indices.size () <<
".");
1804 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1805 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1806 "Local row index " << lclRow <<
" does not belong to this process.");
1808 if (! graph.indicesAreAllocated ()) {
1812 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1815 #ifdef HAVE_TPETRA_DEBUG
1816 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1821 using Teuchos::toString;
1824 Teuchos::Array<LocalOrdinal> badColInds;
1825 bool allInColMap =
true;
1826 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1828 allInColMap =
false;
1829 badColInds.push_back (indices[k]);
1832 if (! allInColMap) {
1833 std::ostringstream os;
1834 os <<
"You attempted to insert entries in owned row " << lclRow
1835 <<
", at the following column indices: " << toString (indices)
1837 os <<
"Of those, the following indices are not in the column Map on "
1838 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1839 "the matrix has a column Map already, it is invalid to insert "
1840 "entries at those locations.";
1841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1842 (
true, std::invalid_argument, os.str ());
1845 #endif // HAVE_TPETRA_DEBUG
1849 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1851 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1852 valsView[offset] += values[k]; };
1853 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1854 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1855 }
else if (CM ==
INSERT) {
1856 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1857 valsView[offset] = values[k]; };
1858 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1859 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1861 std::ostringstream os;
1862 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1863 <<
"but this has not been implemented." << endl;
1864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1865 (
true, std::invalid_argument, os.str ());
1869 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1873 const LocalOrdinal numEnt,
1874 const Scalar vals[],
1875 const LocalOrdinal cols[],
1878 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1879 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1880 this->insertLocalValues (localRow, colsT, valsT, CM);
1883 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1888 const GlobalOrdinal gblColInds[],
1890 const size_t numInputEnt)
1892 #ifdef HAVE_TPETRA_DEBUG
1893 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1895 const size_t curNumEnt = rowInfo.numEntries;
1896 #endif // HAVE_TPETRA_DEBUG
1898 if (! graph.indicesAreAllocated ()) {
1901 using ::Tpetra::Details::Behavior;
1902 const bool verbose = Behavior::verbose(
"CrsMatrix");
1903 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1908 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1911 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1912 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1913 valsView[offset] += vals[k];
1915 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1916 #ifdef HAVE_TPETRA_DEBUG
1922 #ifdef HAVE_TPETRA_DEBUG
1923 size_t newNumEnt = curNumEnt + numInserted;
1924 const size_t chkNewNumEnt =
1926 if (chkNewNumEnt != newNumEnt) {
1927 std::ostringstream os;
1928 os << std::endl <<
"newNumEnt = " << newNumEnt
1929 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1930 <<
") = " << chkNewNumEnt <<
"." << std::endl
1931 <<
"\torigNumEnt: " << origNumEnt << std::endl
1932 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1933 <<
"\tgblColInds: [";
1934 for (
size_t k = 0; k < numInputEnt; ++k) {
1935 os << gblColInds[k];
1936 if (k +
size_t (1) < numInputEnt) {
1940 os <<
"]" << std::endl
1942 for (
size_t k = 0; k < numInputEnt; ++k) {
1944 if (k +
size_t (1) < numInputEnt) {
1948 os <<
"]" << std::endl;
1950 if (this->supportsRowViews ()) {
1951 values_host_view_type vals2;
1952 if (this->isGloballyIndexed ()) {
1953 global_inds_host_view_type gblColInds2;
1954 const GlobalOrdinal gblRow =
1955 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1957 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1958 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1962 bool getViewThrew =
false;
1964 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1966 catch (std::exception& e) {
1967 getViewThrew =
true;
1968 os <<
"getGlobalRowView threw exception:" << std::endl
1969 << e.what () << std::endl;
1971 if (! getViewThrew) {
1972 os <<
"\tNew global column indices: ";
1973 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1974 os << gblColInds2[jjj] <<
" ";
1976 os <<
"\tNew values: ";
1977 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1978 os << vals2[jjj] <<
" ";
1983 else if (this->isLocallyIndexed ()) {
1984 local_inds_host_view_type lclColInds2;
1985 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1986 os <<
"\tNew local column indices: ";
1987 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1988 os << lclColInds2[jjj] <<
" ";
1990 os <<
"\tNew values: ";
1991 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1992 os << vals2[jjj] <<
" ";
1997 os <<
"Please report this bug to the Tpetra developers.";
1998 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1999 (
true, std::logic_error, os.str ());
2001 #endif // HAVE_TPETRA_DEBUG
2004 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2008 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2009 const Teuchos::ArrayView<const Scalar>& values)
2011 using Teuchos::toString;
2014 typedef LocalOrdinal LO;
2015 typedef GlobalOrdinal GO;
2016 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2017 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2018 const char tfecfFuncName[] =
"insertGlobalValues: ";
2020 #ifdef HAVE_TPETRA_DEBUG
2021 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2022 (values.size () != indices.size (), std::runtime_error,
2023 "values.size() = " << values.size () <<
" != indices.size() = "
2024 << indices.size () <<
".");
2025 #endif // HAVE_TPETRA_DEBUG
2029 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2032 if (lclRow == OTLO::invalid ()) {
2039 this->insertNonownedGlobalValues (gblRow, indices, values);
2042 if (this->isStaticGraph ()) {
2044 const int myRank = rowMap.getComm ()->getRank ();
2045 const int numProcs = rowMap.getComm ()->getSize ();
2046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2047 (
true, std::runtime_error,
2048 "The matrix was constructed with a constant (\"static\") graph, "
2049 "yet the given global row index " << gblRow <<
" is in the row "
2050 "Map on the calling process (with rank " << myRank <<
", of " <<
2051 numProcs <<
" process(es)). In this case, you may not insert "
2052 "new entries into rows owned by the calling process.");
2056 const IST*
const inputVals =
2057 reinterpret_cast<const IST*
> (values.getRawPtr ());
2058 const GO*
const inputGblColInds = indices.getRawPtr ();
2059 const size_t numInputEnt = indices.size ();
2068 if (! graph.
colMap_.is_null ()) {
2074 #ifdef HAVE_TPETRA_DEBUG
2075 Teuchos::Array<GO> badColInds;
2076 #endif // HAVE_TPETRA_DEBUG
2077 const size_type numEntriesToInsert = indices.size ();
2078 bool allInColMap =
true;
2079 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2081 allInColMap =
false;
2082 #ifdef HAVE_TPETRA_DEBUG
2083 badColInds.push_back (indices[k]);
2086 #endif // HAVE_TPETRA_DEBUG
2089 if (! allInColMap) {
2090 std::ostringstream os;
2091 os <<
"You attempted to insert entries in owned row " << gblRow
2092 <<
", at the following column indices: " << toString (indices)
2094 #ifdef HAVE_TPETRA_DEBUG
2095 os <<
"Of those, the following indices are not in the column Map "
2096 "on this process: " << toString (badColInds) <<
"." << endl
2097 <<
"Since the matrix has a column Map already, it is invalid "
2098 "to insert entries at those locations.";
2100 os <<
"At least one of those indices is not in the column Map "
2101 "on this process." << endl <<
"It is invalid to insert into "
2102 "columns not in the column Map on the process that owns the "
2104 #endif // HAVE_TPETRA_DEBUG
2105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2106 (
true, std::invalid_argument, os.str ());
2110 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2111 inputVals, numInputEnt);
2116 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2120 const LocalOrdinal numEnt,
2121 const Scalar vals[],
2122 const GlobalOrdinal inds[])
2124 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2125 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2126 this->insertGlobalValues (globalRow, indsT, valsT);
2130 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2134 const GlobalOrdinal gblRow,
2135 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2136 const Teuchos::ArrayView<const Scalar>& values,
2139 typedef impl_scalar_type IST;
2140 typedef LocalOrdinal LO;
2141 typedef GlobalOrdinal GO;
2142 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2143 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2146 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2147 (values.size () != indices.size (), std::runtime_error,
2148 "values.size() = " << values.size () <<
" != indices.size() = "
2149 << indices.size () <<
".");
2154 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2155 const LO lclRow = rowMap.getLocalElement (gblRow);
2156 if (lclRow == OTLO::invalid ()) {
2163 this->insertNonownedGlobalValues (gblRow, indices, values);
2166 if (this->isStaticGraph ()) {
2168 const int myRank = rowMap.getComm ()->getRank ();
2169 const int numProcs = rowMap.getComm ()->getSize ();
2170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2171 (
true, std::runtime_error,
2172 "The matrix was constructed with a constant (\"static\") graph, "
2173 "yet the given global row index " << gblRow <<
" is in the row "
2174 "Map on the calling process (with rank " << myRank <<
", of " <<
2175 numProcs <<
" process(es)). In this case, you may not insert "
2176 "new entries into rows owned by the calling process.");
2179 crs_graph_type& graph = * (this->myGraph_);
2180 const IST*
const inputVals =
2181 reinterpret_cast<const IST*
> (values.getRawPtr ());
2182 const GO*
const inputGblColInds = indices.getRawPtr ();
2183 const size_t numInputEnt = indices.size ();
2184 RowInfo rowInfo = graph.getRowInfo (lclRow);
2186 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2193 const map_type& colMap = * (graph.colMap_);
2194 size_t curOffset = 0;
2195 while (curOffset < numInputEnt) {
2199 Teuchos::Array<LO> lclIndices;
2200 size_t endOffset = curOffset;
2201 for ( ; endOffset < numInputEnt; ++endOffset) {
2202 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2203 if (lclIndex != OTLO::invalid())
2204 lclIndices.push_back(lclIndex);
2211 const LO numIndInSeq = (endOffset - curOffset);
2212 if (numIndInSeq != 0) {
2213 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2219 const bool invariant = endOffset == numInputEnt ||
2220 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2221 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2222 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2224 curOffset = endOffset + 1;
2227 else if (! graph.colMap_.is_null ()) {
2228 const map_type& colMap = * (graph.colMap_);
2229 size_t curOffset = 0;
2230 while (curOffset < numInputEnt) {
2234 size_t endOffset = curOffset;
2235 for ( ; endOffset < numInputEnt &&
2236 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2242 const LO numIndInSeq = (endOffset - curOffset);
2243 if (numIndInSeq != 0) {
2244 rowInfo = graph.getRowInfo(lclRow);
2245 this->insertGlobalValuesImpl (graph, rowInfo,
2246 inputGblColInds + curOffset,
2247 inputVals + curOffset,
2254 const bool invariant = endOffset == numInputEnt ||
2255 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2256 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2257 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2259 curOffset = endOffset + 1;
2263 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2264 inputVals, numInputEnt);
2269 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2271 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2272 insertGlobalValuesFilteredChecked(
2273 const GlobalOrdinal gblRow,
2274 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2275 const Teuchos::ArrayView<const Scalar>& values,
2276 const char*
const prefix,
2284 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2286 catch(std::exception& e) {
2287 std::ostringstream os;
2289 const size_t maxNumToPrint =
2291 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2292 "exception: " << e.what() << endl
2293 <<
"Global row index: " << gblRow << endl;
2301 os <<
": insertGlobalValuesFiltered threw an exception: "
2304 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2308 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2314 const LocalOrdinal inds[],
2316 const LocalOrdinal numElts)
2318 typedef LocalOrdinal LO;
2319 typedef GlobalOrdinal GO;
2320 const bool sorted = graph.
isSorted ();
2330 for (LO j = 0; j < numElts; ++j) {
2331 const LO lclColInd = inds[j];
2332 const size_t offset =
2333 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2334 lclColInd, hint, sorted);
2335 if (offset != rowInfo.numEntries) {
2336 rowVals[offset] = newVals[j];
2343 if (graph.
colMap_.is_null ()) {
2344 return Teuchos::OrdinalTraits<LO>::invalid ();
2352 for (LO j = 0; j < numElts; ++j) {
2354 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2355 const size_t offset =
2356 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2357 gblColInd, hint, sorted);
2358 if (offset != rowInfo.numEntries) {
2359 rowVals[offset] = newVals[j];
2378 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2382 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2383 const Teuchos::ArrayView<const Scalar>& vals)
2385 typedef LocalOrdinal LO;
2387 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2388 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2389 return Teuchos::OrdinalTraits<LO>::invalid ();
2391 const LO*
const inputInds = lclCols.getRawPtr ();
2392 const Scalar*
const inputVals = vals.getRawPtr ();
2393 return this->replaceLocalValues (localRow, numInputEnt,
2394 inputVals, inputInds);
2397 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2403 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2404 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2407 const LO numInputEnt = inputInds.extent(0);
2408 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2409 return Teuchos::OrdinalTraits<LO>::invalid();
2411 const Scalar*
const inVals =
2412 reinterpret_cast<const Scalar*
>(inputVals.data());
2413 return this->replaceLocalValues(localRow, numInputEnt,
2414 inVals, inputInds.data());
2417 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2421 const LocalOrdinal numEnt,
2422 const Scalar inputVals[],
2423 const LocalOrdinal inputCols[])
2426 typedef LocalOrdinal LO;
2428 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2430 return Teuchos::OrdinalTraits<LO>::invalid ();
2435 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2438 return static_cast<LO
> (0);
2440 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2441 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2442 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2443 inputCols, inVals, numEnt);
2446 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2452 const GlobalOrdinal inds[],
2454 const LocalOrdinal numElts)
2456 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2458 [&](
size_t const k,
size_t const ,
size_t const offset) {
2459 rowVals[offset] = newVals[k];
2461 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2465 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2469 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2470 const Teuchos::ArrayView<const Scalar>& inputVals)
2472 typedef LocalOrdinal LO;
2474 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2475 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2476 return Teuchos::OrdinalTraits<LO>::invalid ();
2478 return this->replaceGlobalValues (globalRow, numInputEnt,
2479 inputVals.getRawPtr (),
2480 inputGblColInds.getRawPtr ());
2483 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2487 const LocalOrdinal numEnt,
2488 const Scalar inputVals[],
2489 const GlobalOrdinal inputGblColInds[])
2492 typedef LocalOrdinal LO;
2494 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2496 return Teuchos::OrdinalTraits<LO>::invalid ();
2501 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2504 return static_cast<LO
> (0);
2507 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2508 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2509 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2510 inputGblColInds, inVals, numEnt);
2513 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2519 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2520 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2529 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2530 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2531 return Teuchos::OrdinalTraits<LO>::invalid();
2533 const Scalar*
const inVals =
2534 reinterpret_cast<const Scalar*
>(inputVals.data());
2535 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2539 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2545 const GlobalOrdinal inds[],
2547 const LocalOrdinal numElts,
2550 typedef LocalOrdinal LO;
2551 typedef GlobalOrdinal GO;
2553 const bool sorted = graph.
isSorted ();
2562 if (graph.
colMap_.is_null ()) {
2573 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2575 for (LO j = 0; j < numElts; ++j) {
2577 if (lclColInd != LINV) {
2578 const size_t offset =
2579 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2580 lclColInd, hint, sorted);
2581 if (offset != rowInfo.numEntries) {
2583 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2586 rowVals[offset] += newVals[j];
2599 for (LO j = 0; j < numElts; ++j) {
2600 const GO gblColInd = inds[j];
2601 const size_t offset =
2602 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2603 gblColInd, hint, sorted);
2604 if (offset != rowInfo.numEntries) {
2606 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2609 rowVals[offset] += newVals[j];
2623 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2627 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2628 const Teuchos::ArrayView<const Scalar>& inputVals,
2631 typedef LocalOrdinal LO;
2633 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2634 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2635 return Teuchos::OrdinalTraits<LO>::invalid ();
2637 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2638 inputVals.getRawPtr (),
2639 inputGblColInds.getRawPtr (),
2643 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2647 const LocalOrdinal numInputEnt,
2648 const Scalar inputVals[],
2649 const GlobalOrdinal inputGblColInds[],
2653 typedef LocalOrdinal LO;
2654 typedef GlobalOrdinal GO;
2656 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2658 return Teuchos::OrdinalTraits<LO>::invalid ();
2663 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2668 using Teuchos::ArrayView;
2669 ArrayView<const GO> inputGblColInds_av(
2670 numInputEnt == 0 ?
nullptr : inputGblColInds,
2672 ArrayView<const Scalar> inputVals_av(
2673 numInputEnt == 0 ?
nullptr :
2674 inputVals, numInputEnt);
2679 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2690 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2691 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2692 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2693 inputGblColInds, inVals,
2694 numInputEnt, atomic);
2698 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2702 const LocalOrdinal numInputEnt,
2703 const impl_scalar_type inputVals[],
2704 const LocalOrdinal inputCols[],
2705 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2708 using Tpetra::Details::OrdinalTraits;
2709 typedef LocalOrdinal LO;
2711 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2713 return Teuchos::OrdinalTraits<LO>::invalid ();
2715 const crs_graph_type& graph = * (this->staticGraph_);
2716 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2718 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2721 return static_cast<LO
> (0);
2723 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2724 return this->transformLocalValues (curRowVals.data (), graph,
2725 rowInfo, inputCols, inputVals,
2726 numInputEnt, f, atomic);
2729 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2731 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2732 transformGlobalValues (
const GlobalOrdinal gblRow,
2733 const LocalOrdinal numInputEnt,
2734 const impl_scalar_type inputVals[],
2735 const GlobalOrdinal inputCols[],
2736 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2739 using Tpetra::Details::OrdinalTraits;
2740 typedef LocalOrdinal LO;
2742 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2744 return OrdinalTraits<LO>::invalid ();
2746 const crs_graph_type& graph = * (this->staticGraph_);
2747 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2749 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2752 return static_cast<LO
> (0);
2754 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2755 return this->transformGlobalValues (curRowVals.data (), graph,
2756 rowInfo, inputCols, inputVals,
2757 numInputEnt, f, atomic);
2760 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2762 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2763 transformLocalValues (impl_scalar_type rowVals[],
2764 const crs_graph_type& graph,
2765 const RowInfo& rowInfo,
2766 const LocalOrdinal inds[],
2767 const impl_scalar_type newVals[],
2768 const LocalOrdinal numElts,
2769 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2772 typedef impl_scalar_type ST;
2773 typedef LocalOrdinal LO;
2774 typedef GlobalOrdinal GO;
2781 const bool sorted = graph.isSorted ();
2786 if (graph.isLocallyIndexed ()) {
2789 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2791 for (LO j = 0; j < numElts; ++j) {
2792 const LO lclColInd = inds[j];
2793 const size_t offset =
2794 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2795 lclColInd, hint, sorted);
2796 if (offset != rowInfo.numEntries) {
2805 volatile ST*
const dest = &rowVals[offset];
2806 (void) atomic_binary_function_update (dest, newVals[j], f);
2810 rowVals[offset] = f (rowVals[offset], newVals[j]);
2817 else if (graph.isGloballyIndexed ()) {
2821 if (graph.colMap_.is_null ()) {
2828 const map_type& colMap = * (graph.colMap_);
2831 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2833 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2834 for (LO j = 0; j < numElts; ++j) {
2835 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2836 if (gblColInd != GINV) {
2837 const size_t offset =
2838 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2839 gblColInd, hint, sorted);
2840 if (offset != rowInfo.numEntries) {
2849 volatile ST*
const dest = &rowVals[offset];
2850 (void) atomic_binary_function_update (dest, newVals[j], f);
2854 rowVals[offset] = f (rowVals[offset], newVals[j]);
2869 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2871 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2872 transformGlobalValues (impl_scalar_type rowVals[],
2873 const crs_graph_type& graph,
2874 const RowInfo& rowInfo,
2875 const GlobalOrdinal inds[],
2876 const impl_scalar_type newVals[],
2877 const LocalOrdinal numElts,
2878 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2881 typedef impl_scalar_type ST;
2882 typedef LocalOrdinal LO;
2883 typedef GlobalOrdinal GO;
2890 const bool sorted = graph.isSorted ();
2895 if (graph.isGloballyIndexed ()) {
2898 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2900 for (LO j = 0; j < numElts; ++j) {
2901 const GO gblColInd = inds[j];
2902 const size_t offset =
2903 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2904 gblColInd, hint, sorted);
2905 if (offset != rowInfo.numEntries) {
2914 volatile ST*
const dest = &rowVals[offset];
2915 (void) atomic_binary_function_update (dest, newVals[j], f);
2919 rowVals[offset] = f (rowVals[offset], newVals[j]);
2926 else if (graph.isLocallyIndexed ()) {
2930 if (graph.colMap_.is_null ()) {
2936 const map_type& colMap = * (graph.colMap_);
2939 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2941 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2942 for (LO j = 0; j < numElts; ++j) {
2943 const LO lclColInd = colMap.getLocalElement (inds[j]);
2944 if (lclColInd != LINV) {
2945 const size_t offset =
2946 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2947 lclColInd, hint, sorted);
2948 if (offset != rowInfo.numEntries) {
2957 volatile ST*
const dest = &rowVals[offset];
2958 (void) atomic_binary_function_update (dest, newVals[j], f);
2962 rowVals[offset] = f (rowVals[offset], newVals[j]);
2977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2983 const LocalOrdinal inds[],
2985 const LocalOrdinal numElts,
2988 typedef LocalOrdinal LO;
2989 typedef GlobalOrdinal GO;
2991 const bool sorted = graph.
isSorted ();
3001 for (LO j = 0; j < numElts; ++j) {
3002 const LO lclColInd = inds[j];
3003 const size_t offset =
3004 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3005 lclColInd, hint, sorted);
3006 if (offset != rowInfo.numEntries) {
3008 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3011 rowVals[offset] += newVals[j];
3019 if (graph.
colMap_.is_null ()) {
3020 return Teuchos::OrdinalTraits<LO>::invalid ();
3028 for (LO j = 0; j < numElts; ++j) {
3030 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3031 const size_t offset =
3032 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3033 gblColInd, hint, sorted);
3034 if (offset != rowInfo.numEntries) {
3036 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3039 rowVals[offset] += newVals[j];
3059 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3063 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3064 const Teuchos::ArrayView<const Scalar>& values,
3068 const LO numInputEnt =
static_cast<LO
>(indices.size());
3069 if (static_cast<LO>(values.size()) != numInputEnt) {
3070 return Teuchos::OrdinalTraits<LO>::invalid();
3072 const LO*
const inputInds = indices.getRawPtr();
3073 const scalar_type*
const inputVals = values.getRawPtr();
3074 return this->sumIntoLocalValues(localRow, numInputEnt,
3075 inputVals, inputInds, atomic);
3078 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3084 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3085 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3089 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3090 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3091 return Teuchos::OrdinalTraits<LO>::invalid();
3094 reinterpret_cast<const scalar_type*
>(inputVals.data());
3095 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3096 inputInds.data(), atomic);
3099 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3103 const LocalOrdinal numEnt,
3104 const Scalar vals[],
3105 const LocalOrdinal cols[],
3109 typedef LocalOrdinal LO;
3111 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3113 return Teuchos::OrdinalTraits<LO>::invalid ();
3118 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3121 return static_cast<LO
> (0);
3123 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3124 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3125 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3126 cols, inputVals, numEnt, atomic);
3129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3131 values_dualv_type::t_host::const_type
3135 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3136 return typename values_dualv_type::t_host::const_type ();
3138 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3143 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3145 values_dualv_type::t_host
3149 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3150 return typename values_dualv_type::t_host ();
3152 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3159 values_dualv_type::t_dev::const_type
3163 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3164 return typename values_dualv_type::t_dev::const_type ();
3166 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3173 values_dualv_type::t_dev
3177 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3178 return typename values_dualv_type::t_dev ();
3180 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3186 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3190 nonconst_local_inds_host_view_type &indices,
3191 nonconst_values_host_view_type &values,
3192 size_t& numEntries)
const
3194 using Teuchos::ArrayView;
3195 using Teuchos::av_reinterpret_cast;
3196 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3199 (! this->hasColMap (), std::runtime_error,
3200 "The matrix does not have a column Map yet. This means we don't have "
3201 "local indices for columns yet, so it doesn't make sense to call this "
3202 "method. If the matrix doesn't have a column Map yet, you should call "
3203 "fillComplete on it first.");
3205 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3206 const size_t theNumEntries = rowinfo.numEntries;
3207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3208 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3209 static_cast<size_t> (values.size ()) < theNumEntries,
3210 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3211 theNumEntries <<
" entry/ies, but indices.size() = " <<
3212 indices.size () <<
" and values.size() = " << values.size () <<
".");
3213 numEntries = theNumEntries;
3215 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3216 if (staticGraph_->isLocallyIndexed ()) {
3217 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3218 auto curVals = getValuesViewHost(rowinfo);
3220 for (
size_t j = 0; j < theNumEntries; ++j) {
3221 values[j] = curVals[j];
3222 indices[j] = curLclInds(j);
3225 else if (staticGraph_->isGloballyIndexed ()) {
3227 const map_type& colMap = * (staticGraph_->colMap_);
3228 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3229 auto curVals = getValuesViewHost(rowinfo);
3231 for (
size_t j = 0; j < theNumEntries; ++j) {
3232 values[j] = curVals[j];
3240 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3244 nonconst_global_inds_host_view_type &indices,
3245 nonconst_values_host_view_type &values,
3246 size_t& numEntries)
const
3248 using Teuchos::ArrayView;
3249 using Teuchos::av_reinterpret_cast;
3250 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3253 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3254 const size_t theNumEntries = rowinfo.numEntries;
3255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3256 static_cast<size_t> (indices.size ()) < theNumEntries ||
3257 static_cast<size_t> (values.size ()) < theNumEntries,
3258 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3259 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3260 indices.size () <<
" and values.size() = " << values.size () <<
".");
3261 numEntries = theNumEntries;
3263 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3264 if (staticGraph_->isLocallyIndexed ()) {
3265 const map_type& colMap = * (staticGraph_->colMap_);
3266 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3267 auto curVals = getValuesViewHost(rowinfo);
3269 for (
size_t j = 0; j < theNumEntries; ++j) {
3270 values[j] = curVals[j];
3274 else if (staticGraph_->isGloballyIndexed ()) {
3275 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3276 auto curVals = getValuesViewHost(rowinfo);
3278 for (
size_t j = 0; j < theNumEntries; ++j) {
3279 values[j] = curVals[j];
3280 indices[j] = curGblInds(j);
3287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3291 local_inds_host_view_type &indices,
3292 values_host_view_type &values)
const
3294 const char tfecfFuncName[] =
"getLocalRowView: ";
3296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3297 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3298 "its indices as global indices, so you cannot get a view with local "
3299 "column indices. If the matrix has a column Map, you may call "
3300 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3301 "a view with global column indices by calling getGlobalRowCopy().");
3303 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3304 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3305 rowInfo.numEntries > 0) {
3306 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3310 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3317 indices = local_inds_host_view_type();
3318 values = values_host_view_type();
3321 #ifdef HAVE_TPETRA_DEBUG
3322 const char suffix[] =
". This should never happen. Please report this "
3323 "bug to the Tpetra developers.";
3324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3325 (static_cast<size_t> (indices.size ()) !=
3326 static_cast<size_t> (values.size ()), std::logic_error,
3327 "At the end of this method, for local row " << localRow <<
", "
3328 "indices.size() = " << indices.size () <<
" != values.size () = "
3329 << values.size () << suffix);
3330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3331 (static_cast<size_t> (indices.size ()) !=
3332 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3333 "At the end of this method, for local row " << localRow <<
", "
3334 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3335 << rowInfo.numEntries << suffix);
3336 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3337 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3338 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3339 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3340 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3341 expectedNumEntries << suffix);
3342 #endif // HAVE_TPETRA_DEBUG
3346 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3350 global_inds_host_view_type &indices,
3351 values_host_view_type &values)
const
3353 const char tfecfFuncName[] =
"getGlobalRowView: ";
3355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3356 isLocallyIndexed (), std::runtime_error,
3357 "The matrix is locally indexed, so we cannot return a view of the row "
3358 "with global column indices. Use getGlobalRowCopy() instead.");
3363 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3364 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3365 rowInfo.numEntries > 0) {
3366 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3369 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3374 indices = global_inds_host_view_type();
3375 values = values_host_view_type();
3378 #ifdef HAVE_TPETRA_DEBUG
3379 const char suffix[] =
". This should never happen. Please report this "
3380 "bug to the Tpetra developers.";
3381 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3382 (static_cast<size_t> (indices.size ()) !=
3383 static_cast<size_t> (values.size ()), std::logic_error,
3384 "At the end of this method, for global row " << globalRow <<
", "
3385 "indices.size() = " << indices.size () <<
" != values.size () = "
3386 << values.size () << suffix);
3387 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3388 (static_cast<size_t> (indices.size ()) !=
3389 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3390 "At the end of this method, for global row " << globalRow <<
", "
3391 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3392 << rowInfo.numEntries << suffix);
3393 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3395 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3396 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3397 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3398 " " << expectedNumEntries << suffix);
3399 #endif // HAVE_TPETRA_DEBUG
3403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3410 const size_t nlrs = staticGraph_->getLocalNumRows ();
3411 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3412 if (! staticGraph_->indicesAreAllocated () ||
3413 nlrs == 0 || numEntries == 0) {
3418 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3419 KokkosBlas::scal(vals, theAlpha, vals);
3424 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3435 const size_t numEntries = staticGraph_->getLocalNumEntries();
3436 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3444 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3448 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3451 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3452 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3453 const typename local_matrix_device_type::values_type& values)
3456 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3457 const char tfecfFuncName[] =
"setAllValues: ";
3458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3459 (columnIndices.size () != values.size (), std::invalid_argument,
3460 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3461 " = " << values.size () <<
".");
3462 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3463 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3466 myGraph_->setAllIndices (rowPointers, columnIndices);
3468 catch (std::exception &e) {
3469 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3470 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3471 "exception: " << e.what ());
3478 auto lclGraph = myGraph_->getLocalGraphDevice ();
3479 const size_t numEnt = lclGraph.entries.extent (0);
3480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3481 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3482 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3483 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3484 "local graph. Please report this bug to the Tpetra developers.");
3487 valuesUnpacked_wdv = valuesPacked_wdv;
3491 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3493 checkInternalState ();
3496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3502 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3504 auto graph = localDeviceMatrix.graph;
3507 auto rows = graph.row_map;
3508 auto columns = graph.entries;
3509 auto values = localDeviceMatrix.values;
3511 setAllValues(rows,columns,values);
3514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3518 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3519 const Teuchos::ArrayRCP<Scalar>& val)
3521 using Kokkos::Compat::getKokkosViewDeepCopy;
3522 using Teuchos::ArrayRCP;
3523 using Teuchos::av_reinterpret_cast;
3526 typedef typename local_graph_device_type::row_map_type row_map_type;
3528 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3534 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3535 Kokkos::View<
const size_t*,
3536 typename row_map_type::array_layout,
3538 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3542 (ptrNative.extent (0) != ptrSizeT.extent (0),
3543 std::logic_error,
"ptrNative.extent(0) = " <<
3544 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3545 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3546 "Tpetra developers.");
3548 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3549 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3550 this->setAllValues (ptrNative, indIn, valIn);
3553 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3558 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3567 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3568 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3569 offsets.resize (lclNumRows);
3575 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3580 Kokkos::MemoryUnmanaged> output_type;
3581 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3582 staticGraph_->getLocalDiagOffsets (offsetsOut);
3585 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3586 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3587 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3588 Kokkos::MemoryUnmanaged> output_type;
3589 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3595 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3600 using Teuchos::ArrayRCP;
3601 using Teuchos::ArrayView;
3602 using Teuchos::av_reinterpret_cast;
3603 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3608 staticGraph_.is_null (), std::runtime_error,
3609 "This method requires that the matrix have a graph.");
3610 auto rowMapPtr = this->getRowMap ();
3611 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3617 auto colMapPtr = this->getColMap ();
3618 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3619 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3620 "This method requires that the matrix have a column Map.");
3621 const map_type& rowMap = * rowMapPtr;
3622 const map_type& colMap = * colMapPtr;
3623 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3625 #ifdef HAVE_TPETRA_DEBUG
3628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3629 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3630 "The input Vector's Map must be compatible with the CrsMatrix's row "
3631 "Map. You may check this by using Map's isCompatible method: "
3632 "diag.getMap ()->isCompatible (A.getRowMap ());");
3633 #endif // HAVE_TPETRA_DEBUG
3635 if (this->isFillComplete ()) {
3638 const auto D_lcl_1d =
3639 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3641 const auto lclRowMap = rowMap.getLocalMap ();
3646 getLocalMatrixDevice ());
3654 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3659 Kokkos::MemoryUnmanaged>& offsets)
const
3661 typedef LocalOrdinal LO;
3663 #ifdef HAVE_TPETRA_DEBUG
3664 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3665 const map_type& rowMap = * (this->getRowMap ());
3668 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3669 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3670 "The input Vector's Map must be compatible with (in the sense of Map::"
3671 "isCompatible) the CrsMatrix's row Map.");
3672 #endif // HAVE_TPETRA_DEBUG
3682 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3685 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3687 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3688 getLocalMatrixDevice ());
3691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3695 const Teuchos::ArrayView<const size_t>& offsets)
const
3697 using LO = LocalOrdinal;
3698 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3701 #ifdef HAVE_TPETRA_DEBUG
3702 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3703 const map_type& rowMap = * (this->getRowMap ());
3706 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3707 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3708 "The input Vector's Map must be compatible with (in the sense of Map::"
3709 "isCompatible) the CrsMatrix's row Map.");
3710 #endif // HAVE_TPETRA_DEBUG
3722 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3724 using host_offsets_view_type =
3725 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3726 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3727 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3729 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3730 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3731 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3733 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3734 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3735 Kokkos::parallel_for
3736 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3737 range_type (0, myNumRows),
3738 [&, INV, h_offsets] (
const LO lclRow) {
3739 lclVecHost1d(lclRow) = STS::zero ();
3740 if (h_offsets[lclRow] != INV) {
3741 auto curRowOffset = rowPtrsPackedHost (lclRow);
3742 lclVecHost1d(lclRow) =
3743 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3750 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3755 using ::Tpetra::Details::ProfilingRegion;
3756 using Teuchos::ArrayRCP;
3757 using Teuchos::ArrayView;
3758 using Teuchos::null;
3761 using Teuchos::rcpFromRef;
3763 const char tfecfFuncName[] =
"leftScale: ";
3765 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3767 RCP<const vec_type> xp;
3768 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3771 auto exporter = this->getCrsGraphRef ().getExporter ();
3772 if (exporter.get () !=
nullptr) {
3773 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3774 tempVec->doImport (x, *exporter,
REPLACE);
3778 xp = rcpFromRef (x);
3781 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3782 xp = rcpFromRef (x);
3785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3786 (
true, std::invalid_argument,
"x's Map must be the same as "
3787 "either the row Map or the range Map of the CrsMatrix.");
3790 if (this->isFillComplete()) {
3791 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3792 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3795 x_lcl_1d,
false,
false);
3799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3800 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3810 using ::Tpetra::Details::ProfilingRegion;
3811 using Teuchos::ArrayRCP;
3812 using Teuchos::ArrayView;
3813 using Teuchos::null;
3816 using Teuchos::rcpFromRef;
3818 const char tfecfFuncName[] =
"rightScale: ";
3820 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3822 RCP<const vec_type> xp;
3823 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3826 auto importer = this->getCrsGraphRef ().getImporter ();
3827 if (importer.get () !=
nullptr) {
3828 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3829 tempVec->doImport (x, *importer,
REPLACE);
3833 xp = rcpFromRef (x);
3836 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3837 xp = rcpFromRef (x);
3839 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3840 (
true, std::runtime_error,
"x's Map must be the same as "
3841 "either the domain Map or the column Map of the CrsMatrix.");
3844 if (this->isFillComplete()) {
3845 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3846 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3849 x_lcl_1d,
false,
false);
3853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3854 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3859 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3864 using Teuchos::ArrayView;
3865 using Teuchos::outArg;
3866 using Teuchos::REDUCE_SUM;
3867 using Teuchos::reduceAll;
3875 if (getLocalNumEntries() > 0) {
3876 if (isStorageOptimized ()) {
3879 const size_t numEntries = getLocalNumEntries ();
3880 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3881 for (
size_t k = 0; k < numEntries; ++k) {
3882 auto val = values[k];
3886 const mag_type val_abs = STS::abs (val);
3887 mySum += val_abs * val_abs;
3891 const LocalOrdinal numRows =
3892 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3893 for (LocalOrdinal r = 0; r < numRows; ++r) {
3894 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3895 const size_t numEntries = rowInfo.numEntries;
3896 auto A_r = this->getValuesViewHost(rowInfo);
3897 for (
size_t k = 0; k < numEntries; ++k) {
3899 const mag_type val_abs = STS::abs (val);
3900 mySum += val_abs * val_abs;
3906 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3907 mySum, outArg (totalSum));
3908 return STM::sqrt (totalSum);
3911 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3916 const char tfecfFuncName[] =
"replaceColMap: ";
3920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3921 myGraph_.is_null (), std::runtime_error,
3922 "This method does not work if the matrix has a const graph. The whole "
3923 "idea of a const graph is that you are not allowed to change it, but "
3924 "this method necessarily must modify the graph, since the graph owns "
3925 "the matrix's column Map.");
3926 myGraph_->replaceColMap (newColMap);
3929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3933 const Teuchos::RCP<const map_type>& newColMap,
3934 const Teuchos::RCP<const import_type>& newImport,
3935 const bool sortEachRow)
3937 const char tfecfFuncName[] =
"reindexColumns: ";
3938 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3939 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3940 "The input graph is null, but the matrix does not own its graph.");
3942 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3943 const bool sortGraph =
false;
3947 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3948 const LocalOrdinal lclNumRows =
3949 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3951 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3953 const RowInfo rowInfo = theGraph.getRowInfo (row);
3954 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3955 auto vals = this->getValuesViewHostNonConst (rowInfo);
3957 sort2 (lclColInds.data (),
3958 lclColInds.data () + rowInfo.numEntries,
3961 theGraph.indicesAreSorted_ =
true;
3965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3970 const char tfecfFuncName[] =
"replaceDomainMap: ";
3971 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3972 myGraph_.is_null (), std::runtime_error,
3973 "This method does not work if the matrix has a const graph. The whole "
3974 "idea of a const graph is that you are not allowed to change it, but this"
3975 " method necessarily must modify the graph, since the graph owns the "
3976 "matrix's domain Map and Import objects.");
3977 myGraph_->replaceDomainMap (newDomainMap);
3980 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3984 Teuchos::RCP<const import_type>& newImporter)
3986 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3988 myGraph_.is_null (), std::runtime_error,
3989 "This method does not work if the matrix has a const graph. The whole "
3990 "idea of a const graph is that you are not allowed to change it, but this"
3991 " method necessarily must modify the graph, since the graph owns the "
3992 "matrix's domain Map and Import objects.");
3993 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3996 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4001 const char tfecfFuncName[] =
"replaceRangeMap: ";
4002 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4003 myGraph_.is_null (), std::runtime_error,
4004 "This method does not work if the matrix has a const graph. The whole "
4005 "idea of a const graph is that you are not allowed to change it, but this"
4006 " method necessarily must modify the graph, since the graph owns the "
4007 "matrix's domain Map and Import objects.");
4008 myGraph_->replaceRangeMap (newRangeMap);
4011 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4015 Teuchos::RCP<const export_type>& newExporter)
4017 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
4018 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4019 myGraph_.is_null (), std::runtime_error,
4020 "This method does not work if the matrix has a const graph. The whole "
4021 "idea of a const graph is that you are not allowed to change it, but this"
4022 " method necessarily must modify the graph, since the graph owns the "
4023 "matrix's domain Map and Import objects.");
4024 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
4027 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4031 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4032 const Teuchos::ArrayView<const Scalar>& values)
4034 using Teuchos::Array;
4035 typedef GlobalOrdinal GO;
4036 typedef typename Array<GO>::size_type size_type;
4038 const size_type numToInsert = indices.size ();
4041 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4042 Array<GO>& curRowInds = curRow.first;
4043 Array<Scalar>& curRowVals = curRow.second;
4044 const size_type newCapacity = curRowInds.size () + numToInsert;
4045 curRowInds.reserve (newCapacity);
4046 curRowVals.reserve (newCapacity);
4047 for (size_type k = 0; k < numToInsert; ++k) {
4048 curRowInds.push_back (indices[k]);
4049 curRowVals.push_back (values[k]);
4053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4060 using Teuchos::Comm;
4061 using Teuchos::outArg;
4064 using Teuchos::REDUCE_MAX;
4065 using Teuchos::REDUCE_MIN;
4066 using Teuchos::reduceAll;
4070 typedef GlobalOrdinal GO;
4071 typedef typename Teuchos::Array<GO>::size_type size_type;
4072 const char tfecfFuncName[] =
"globalAssemble: ";
4073 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4075 const bool verbose = Behavior::verbose(
"CrsMatrix");
4076 std::unique_ptr<std::string> prefix;
4078 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4079 std::ostringstream os;
4080 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4082 std::cerr << os.str();
4084 RCP<const Comm<int> > comm = getComm ();
4086 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4087 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4088 "you may call this method.");
4090 const size_t myNumNonlocalRows = nonlocals_.size ();
4097 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4098 int someoneHasNonlocalRows = 0;
4099 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4100 outArg (someoneHasNonlocalRows));
4101 if (someoneHasNonlocalRows == 0) {
4115 RCP<const map_type> nonlocalRowMap;
4116 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4118 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4119 size_type curPos = 0;
4120 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4121 ++mapIter, ++curPos) {
4122 myNonlocalGblRows[curPos] = mapIter->first;
4125 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4126 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4133 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4134 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4135 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4136 merge2 (gblCols_newEnd, vals_newEnd,
4137 gblCols.begin (), gblCols.end (),
4138 vals.begin (), vals.end ());
4139 gblCols.erase (gblCols_newEnd, gblCols.end ());
4140 vals.erase (vals_newEnd, vals.end ());
4141 numEntPerNonlocalRow[curPos] = gblCols.size ();
4152 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4154 auto iter = std::min_element (myNonlocalGblRows.begin (),
4155 myNonlocalGblRows.end ());
4156 if (iter != myNonlocalGblRows.end ()) {
4157 myMinNonlocalGblRow = *iter;
4160 GO gblMinNonlocalGblRow = 0;
4161 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4162 outArg (gblMinNonlocalGblRow));
4163 const GO indexBase = gblMinNonlocalGblRow;
4164 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4165 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4174 std::ostringstream os;
4175 os << *prefix <<
"Create nonlocal matrix" << endl;
4176 std::cerr << os.str();
4178 RCP<crs_matrix_type> nonlocalMatrix =
4179 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4181 size_type curPos = 0;
4182 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4183 ++mapIter, ++curPos) {
4184 const GO gblRow = mapIter->first;
4186 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4187 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4189 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4201 auto origRowMap = this->getRowMap ();
4202 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4204 int isLocallyComplete = 1;
4206 if (origRowMapIsOneToOne) {
4208 std::ostringstream os;
4209 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4210 std::cerr << os.str();
4212 export_type exportToOrig (nonlocalRowMap, origRowMap);
4214 isLocallyComplete = 0;
4217 std::ostringstream os;
4218 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4219 std::cerr << os.str();
4221 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4226 std::ostringstream os;
4227 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4228 std::cerr << os.str();
4235 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4237 isLocallyComplete = 0;
4245 std::ostringstream os;
4246 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4248 std::cerr << os.str();
4250 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4252 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4258 std::ostringstream os;
4259 os << *prefix <<
"Free nonlocalMatrix" << endl;
4260 std::cerr << os.str();
4262 nonlocalMatrix = Teuchos::null;
4266 std::ostringstream os;
4267 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4268 std::cerr << os.str();
4270 import_type importToOrig (oneToOneRowMap, origRowMap);
4271 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4279 std::ostringstream os;
4280 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4281 std::cerr << os.str();
4283 decltype (nonlocals_) newNonlocals;
4284 std::swap (nonlocals_, newNonlocals);
4293 int isGloballyComplete = 0;
4294 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4295 outArg (isGloballyComplete));
4296 TEUCHOS_TEST_FOR_EXCEPTION
4297 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4298 "you called insertGlobalValues with a global row index which is not in "
4299 "the matrix's row Map on any process in its communicator.");
4302 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4307 if (! isStaticGraph ()) {
4308 myGraph_->resumeFill (params);
4310 #if KOKKOSKERNELS_VERSION >= 40299
4312 applyHelper.reset();
4314 fillComplete_ =
false;
4317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 return getCrsGraphRef ().haveGlobalConstants ();
4324 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4329 const char tfecfFuncName[] =
"fillComplete(params): ";
4331 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4332 (this->getCrsGraph ().is_null (), std::logic_error,
4333 "getCrsGraph() returns null. This should not happen at this point. "
4334 "Please report this bug to the Tpetra developers.");
4344 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4345 Teuchos::RCP<const map_type> domainMap = rangeMap;
4346 this->fillComplete (domainMap, rangeMap, params);
4350 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4354 const Teuchos::RCP<const map_type>& rangeMap,
4355 const Teuchos::RCP<Teuchos::ParameterList>& params)
4359 using Teuchos::ArrayRCP;
4363 const char tfecfFuncName[] =
"fillComplete: ";
4364 ProfilingRegion regionFillComplete
4365 (
"Tpetra::CrsMatrix::fillComplete");
4366 const bool verbose = Behavior::verbose(
"CrsMatrix");
4367 std::unique_ptr<std::string> prefix;
4369 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4370 std::ostringstream os;
4371 os << *prefix << endl;
4372 std::cerr << os.str ();
4375 "Tpetra::CrsMatrix::fillCompete",
4378 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4379 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4380 "Matrix fill state must be active (isFillActive() "
4381 "must be true) before you may call fillComplete().");
4382 const int numProcs = this->getComm ()->getSize ();
4392 bool assertNoNonlocalInserts =
false;
4395 bool sortGhosts =
true;
4397 if (! params.is_null ()) {
4398 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4399 assertNoNonlocalInserts);
4400 if (params->isParameter (
"sort column map ghost gids")) {
4401 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4403 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4404 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4409 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4411 if (! this->myGraph_.is_null ()) {
4412 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4415 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4416 if (this->hasColMap ()) {
4417 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4420 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4425 if (needGlobalAssemble) {
4426 this->globalAssemble ();
4429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4430 (numProcs == 1 && nonlocals_.size() > 0,
4431 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4432 "An invalid entry (i.e., with row index not in the row Map) must have "
4433 "been submitted to the CrsMatrix.");
4436 if (this->isStaticGraph ()) {
4444 #ifdef HAVE_TPETRA_DEBUG
4462 const bool domainMapsMatch =
4463 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4464 const bool rangeMapsMatch =
4465 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4468 (! domainMapsMatch, std::runtime_error,
4469 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4470 "The graph cannot be changed because it was given to the CrsMatrix "
4471 "constructor as const. You can fix this by passing in the graph's "
4472 "domain Map and range Map to the matrix's fillComplete call.");
4474 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4475 (! rangeMapsMatch, std::runtime_error,
4476 "The CrsMatrix's range Map does not match the graph's range Map. "
4477 "The graph cannot be changed because it was given to the CrsMatrix "
4478 "constructor as const. You can fix this by passing in the graph's "
4479 "domain Map and range Map to the matrix's fillComplete call.");
4480 #endif // HAVE_TPETRA_DEBUG
4484 this->fillLocalMatrix (params);
4492 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4495 Teuchos::Array<int> remotePIDs (0);
4496 const bool mustBuildColMap = ! this->hasColMap ();
4497 if (mustBuildColMap) {
4498 this->myGraph_->makeColMap (remotePIDs);
4503 const std::pair<size_t, std::string> makeIndicesLocalResult =
4504 this->myGraph_->makeIndicesLocal(verbose);
4509 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4510 (makeIndicesLocalResult.first != 0, std::runtime_error,
4511 makeIndicesLocalResult.second);
4513 const bool sorted = this->myGraph_->isSorted ();
4514 const bool merged = this->myGraph_->isMerged ();
4515 this->sortAndMergeIndicesAndValues (sorted, merged);
4520 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4524 this->fillLocalGraphAndMatrix (params);
4526 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4527 params->get (
"compute global constants",
true);
4528 if (callGraphComputeGlobalConstants) {
4529 this->myGraph_->computeGlobalConstants ();
4532 this->myGraph_->computeLocalConstants ();
4534 this->myGraph_->fillComplete_ =
true;
4535 this->myGraph_->checkInternalState ();
4540 this->fillComplete_ =
true;
4543 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4545 this->checkInternalState ();
4549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4553 const Teuchos::RCP<const map_type> & rangeMap,
4554 const Teuchos::RCP<const import_type>& importer,
4555 const Teuchos::RCP<const export_type>& exporter,
4556 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4558 #ifdef HAVE_TPETRA_MMM_TIMINGS
4560 if(!params.is_null())
4561 label = params->get(
"Timer Label",label);
4562 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4563 using Teuchos::TimeMonitor;
4565 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4568 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4570 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4571 "must be true) before calling fillComplete().");
4572 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4573 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4576 #ifdef HAVE_TPETRA_MMM_TIMINGS
4577 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4580 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4584 #ifdef HAVE_TPETRA_MMM_TIMINGS
4585 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4588 fillLocalGraphAndMatrix (params);
4593 fillComplete_ =
true;
4596 #ifdef HAVE_TPETRA_DEBUG
4597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4598 ": We're at the end of fillComplete(), but isFillActive() is true. "
4599 "Please report this bug to the Tpetra developers.");
4600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4601 ": We're at the end of fillComplete(), but isFillActive() is true. "
4602 "Please report this bug to the Tpetra developers.");
4603 #endif // HAVE_TPETRA_DEBUG
4605 #ifdef HAVE_TPETRA_MMM_TIMINGS
4606 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4609 checkInternalState();
4613 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4619 LocalOrdinal* beg = cols;
4620 LocalOrdinal* end = cols + rowLen;
4621 LocalOrdinal* newend = beg;
4623 LocalOrdinal* cur = beg + 1;
4627 while (cur != end) {
4628 if (*cur != *newend) {
4645 return newend - beg;
4648 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4653 using ::Tpetra::Details::ProfilingRegion;
4654 typedef LocalOrdinal LO;
4655 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4656 host_execution_space;
4657 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4658 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4659 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4661 if (! sorted || ! merged) {
4662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4663 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4664 "\"static\" (const) graph, since the matrix does not own the graph.");
4665 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4666 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4667 "this matrix claims ! isStaticGraph(). "
4668 "Please report this bug to the Tpetra developers.");
4669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4670 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4671 "this method if the graph's storage has already been optimized. "
4672 "Please report this bug to the Tpetra developers.");
4675 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4676 size_t totalNumDups = 0;
4681 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4683 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4684 [=] (
const LO lclRow,
size_t& numDups) {
4685 size_t rowBegin = rowBegins_(lclRow);
4686 size_t rowLen = rowLengths_(lclRow);
4687 LO* cols = cols_.data() + rowBegin;
4690 sort2 (cols, cols + rowLen, vals);
4693 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4694 rowLengths_(lclRow) = newRowLength;
4695 numDups += rowLen - newRowLength;
4708 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4719 using Teuchos::rcp_const_cast;
4720 using Teuchos::rcpFromRef;
4721 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4722 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4728 if (alpha == ZERO) {
4731 }
else if (beta != ONE) {
4745 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4746 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4752 const bool Y_is_overwritten = (beta ==
ZERO);
4755 const bool Y_is_replicated =
4756 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4764 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4771 RCP<const MV> X_colMap;
4772 if (importer.is_null ()) {
4780 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4782 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4787 X_colMap = rcpFromRef (X_in);
4791 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4797 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4800 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4801 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4808 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4815 if (! exporter.is_null ()) {
4816 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4818 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4824 if (Y_is_overwritten) {
4850 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4857 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4861 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4869 if (Y_is_replicated) {
4870 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4875 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4880 const Teuchos::ETransp mode,
4885 using Teuchos::null;
4888 using Teuchos::rcp_const_cast;
4889 using Teuchos::rcpFromRef;
4890 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4893 if (alpha == ZERO) {
4906 else if (beta == ZERO) {
4924 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4925 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4930 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4931 const bool Y_is_overwritten = (beta ==
ZERO);
4932 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4938 X = rcp (
new MV (X_in, Teuchos::Copy));
4940 X = rcpFromRef (X_in);
4944 if (importer != Teuchos::null) {
4945 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4948 if (importMV_ == null) {
4949 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4952 if (exporter != Teuchos::null) {
4953 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4956 if (exportMV_ == null) {
4957 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4963 if (! exporter.is_null ()) {
4964 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4965 exportMV_->doImport (X_in, *exporter,
INSERT);
4972 if (importer != Teuchos::null) {
4973 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4980 importMV_->putScalar (ZERO);
4982 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4984 if (Y_is_overwritten) {
5001 MV Y (Y_in, Teuchos::Copy);
5002 this->localApply (*X, Y, mode, alpha, beta);
5005 this->localApply (*X, Y_in, mode, alpha, beta);
5012 if (Y_is_replicated) {
5013 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5018 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5023 const Teuchos::ETransp mode,
5024 const Scalar& alpha,
5025 const Scalar& beta)
const
5028 using Teuchos::NO_TRANS;
5029 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5036 const char tfecfFuncName[] =
"localApply: ";
5037 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5041 const bool transpose = (mode != Teuchos::NO_TRANS);
5042 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5044 getColMap ()->getLocalNumElements (), std::runtime_error,
5045 "NO_TRANS case: X has the wrong number of local rows. "
5047 "getColMap()->getLocalNumElements() = " <<
5048 getColMap ()->getLocalNumElements () <<
".");
5049 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5051 getRowMap ()->getLocalNumElements (), std::runtime_error,
5052 "NO_TRANS case: Y has the wrong number of local rows. "
5054 "getRowMap()->getLocalNumElements() = " <<
5055 getRowMap ()->getLocalNumElements () <<
".");
5056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5058 getRowMap ()->getLocalNumElements (), std::runtime_error,
5059 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5061 <<
" != getRowMap()->getLocalNumElements() = "
5062 << getRowMap ()->getLocalNumElements () <<
".");
5063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5065 getColMap ()->getLocalNumElements (), std::runtime_error,
5066 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5068 <<
" != getColMap()->getLocalNumElements() = "
5069 << getColMap ()->getLocalNumElements () <<
".");
5070 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5071 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5072 "fill complete. You must call fillComplete() (possibly with "
5073 "domain and range Map arguments) without an intervening "
5074 "resumeFill() call before you may call this method.");
5075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5077 std::runtime_error,
"X and Y must be constant stride.");
5082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5083 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5084 && X_lcl.extent(0) != 0,
5085 std::runtime_error,
"X and Y may not alias one another.");
5088 #if KOKKOSKERNELS_VERSION >= 40299
5089 auto A_lcl = getLocalMatrixDevice();
5091 if(!applyHelper.get()) {
5094 bool useMergePath =
false;
5095 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5101 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5102 LocalOrdinal nrows = getLocalNumRows();
5103 LocalOrdinal maxRowImbalance = 0;
5105 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5108 useMergePath =
true;
5111 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5112 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5116 const char* modeKK =
nullptr;
5119 case Teuchos::NO_TRANS:
5120 modeKK = KokkosSparse::NoTranspose;
break;
5121 case Teuchos::TRANS:
5122 modeKK = KokkosSparse::Transpose;
break;
5123 case Teuchos::CONJ_TRANS:
5124 modeKK = KokkosSparse::ConjugateTranspose;
break;
5126 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5129 if(applyHelper->shouldUseIntRowptrs())
5131 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5133 &applyHelper->handle_int, modeKK,
5139 &applyHelper->handle, modeKK,
5143 LocalOrdinal nrows = getLocalNumRows();
5144 LocalOrdinal maxRowImbalance = 0;
5146 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5148 auto matrix_lcl = getLocalMultiplyOperator();
5150 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5152 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5156 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5161 Teuchos::ETransp mode,
5166 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5168 TEUCHOS_TEST_FOR_EXCEPTION
5169 (! isFillComplete (), std::runtime_error,
5170 fnName <<
": Cannot call apply() until fillComplete() "
5171 "has been called.");
5173 if (mode == Teuchos::NO_TRANS) {
5174 ProfilingRegion regionNonTranspose (fnName);
5175 this->applyNonTranspose (X, Y, alpha, beta);
5178 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5179 this->applyTranspose (X, Y, mode, alpha, beta);
5184 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5186 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5192 const char tfecfFuncName[] =
"convert: ";
5194 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5195 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5196 "of the conversion) is not fill complete. You must first call "
5197 "fillComplete() (possibly with the domain and range Map) without an "
5198 "intervening call to resumeFill(), before you may call this method.");
5200 RCP<output_matrix_type> newMatrix
5201 (
new output_matrix_type (this->getCrsGraph ()));
5205 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5206 this->getLocalMatrixDevice ().values);
5210 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5216 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5223 const char tfecfFuncName[] =
"checkInternalState: ";
5224 const char err[] =
"Internal state is not consistent. "
5225 "Please report this bug to the Tpetra developers.";
5229 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5230 (staticGraph_.is_null (), std::logic_error, err);
5234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5235 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5236 std::logic_error, err);
5238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5239 (isFillComplete () && ! staticGraph_->isFillComplete (),
5240 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5241 "but its graph is NOT fill complete.");
5244 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5245 (staticGraph_->indicesAreAllocated () &&
5246 staticGraph_->getLocalAllocationSize() > 0 &&
5247 staticGraph_->getLocalNumRows() > 0 &&
5248 valuesUnpacked_wdv.extent (0) == 0,
5249 std::logic_error, err);
5253 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5258 std::ostringstream os;
5260 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5261 if (this->getObjectLabel () !=
"") {
5262 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5264 if (isFillComplete ()) {
5265 os <<
"isFillComplete: true"
5266 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5267 << getGlobalNumCols () <<
"]"
5268 <<
", global number of entries: " << getGlobalNumEntries ()
5272 os <<
"isFillComplete: false"
5273 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5274 << getGlobalNumCols () <<
"]}";
5279 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5283 const Teuchos::EVerbosityLevel verbLevel)
const
5287 using Teuchos::ArrayView;
5288 using Teuchos::Comm;
5290 using Teuchos::TypeNameTraits;
5291 using Teuchos::VERB_DEFAULT;
5292 using Teuchos::VERB_NONE;
5293 using Teuchos::VERB_LOW;
5294 using Teuchos::VERB_MEDIUM;
5295 using Teuchos::VERB_HIGH;
5296 using Teuchos::VERB_EXTREME;
5298 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5300 if (vl == VERB_NONE) {
5305 Teuchos::OSTab tab0 (out);
5307 RCP<const Comm<int> > comm = this->getComm();
5308 const int myRank = comm->getRank();
5309 const int numProcs = comm->getSize();
5311 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5314 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5324 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5326 Teuchos::OSTab tab1 (out);
5329 if (this->getObjectLabel () !=
"") {
5330 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5333 out <<
"Template parameters:" << endl;
5334 Teuchos::OSTab tab2 (out);
5335 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5336 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5337 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5338 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5340 if (isFillComplete()) {
5341 out <<
"isFillComplete: true" << endl
5342 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5343 << getGlobalNumCols () <<
"]" << endl
5344 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5345 << endl <<
"Global max number of entries in a row: "
5346 << getGlobalMaxNumRowEntries () << endl;
5349 out <<
"isFillComplete: false" << endl
5350 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5351 << getGlobalNumCols () <<
"]" << endl;
5355 if (vl < VERB_MEDIUM) {
5361 out << endl <<
"Row Map:" << endl;
5363 if (getRowMap ().is_null ()) {
5365 out <<
"null" << endl;
5372 getRowMap ()->describe (out, vl);
5377 out <<
"Column Map: ";
5379 if (getColMap ().is_null ()) {
5381 out <<
"null" << endl;
5383 }
else if (getColMap () == getRowMap ()) {
5385 out <<
"same as row Map" << endl;
5391 getColMap ()->describe (out, vl);
5396 out <<
"Domain Map: ";
5398 if (getDomainMap ().is_null ()) {
5400 out <<
"null" << endl;
5402 }
else if (getDomainMap () == getRowMap ()) {
5404 out <<
"same as row Map" << endl;
5406 }
else if (getDomainMap () == getColMap ()) {
5408 out <<
"same as column Map" << endl;
5414 getDomainMap ()->describe (out, vl);
5419 out <<
"Range Map: ";
5421 if (getRangeMap ().is_null ()) {
5423 out <<
"null" << endl;
5425 }
else if (getRangeMap () == getDomainMap ()) {
5427 out <<
"same as domain Map" << endl;
5429 }
else if (getRangeMap () == getRowMap ()) {
5431 out <<
"same as row Map" << endl;
5437 getRangeMap ()->describe (out, vl);
5441 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5442 if (myRank == curRank) {
5443 out <<
"Process rank: " << curRank << endl;
5444 Teuchos::OSTab tab2 (out);
5445 if (! staticGraph_->indicesAreAllocated ()) {
5446 out <<
"Graph indices not allocated" << endl;
5449 out <<
"Number of allocated entries: "
5450 << staticGraph_->getLocalAllocationSize () << endl;
5452 out <<
"Number of entries: " << getLocalNumEntries () << endl
5453 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5462 if (vl < VERB_HIGH) {
5467 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5468 if (myRank == curRank) {
5469 out << std::setw(width) <<
"Proc Rank"
5470 << std::setw(width) <<
"Global Row"
5471 << std::setw(width) <<
"Num Entries";
5472 if (vl == VERB_EXTREME) {
5473 out << std::setw(width) <<
"(Index,Value)";
5476 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5477 const size_t nE = getNumEntriesInLocalRow(r);
5478 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5479 out << std::setw(width) << myRank
5480 << std::setw(width) << gid
5481 << std::setw(width) << nE;
5482 if (vl == VERB_EXTREME) {
5483 if (isGloballyIndexed()) {
5484 global_inds_host_view_type rowinds;
5485 values_host_view_type rowvals;
5486 getGlobalRowView (gid, rowinds, rowvals);
5487 for (
size_t j = 0; j < nE; ++j) {
5488 out <<
" (" << rowinds[j]
5489 <<
", " << rowvals[j]
5493 else if (isLocallyIndexed()) {
5494 local_inds_host_view_type rowinds;
5495 values_host_view_type rowvals;
5496 getLocalRowView (r, rowinds, rowvals);
5497 for (
size_t j=0; j < nE; ++j) {
5498 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5499 <<
", " << rowvals[j]
5515 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5529 return (srcRowMat !=
nullptr);
5532 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5536 const typename crs_graph_type::padding_type& padding,
5542 using LO = local_ordinal_type;
5543 using row_ptrs_type =
5544 typename local_graph_device_type::row_map_type::non_const_type;
5545 using range_policy =
5546 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5547 const char tfecfFuncName[] =
"applyCrsPadding";
5548 const char suffix[] =
5549 ". Please report this bug to the Tpetra developers.";
5550 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5552 std::unique_ptr<std::string> prefix;
5554 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5555 std::ostringstream os;
5556 os << *prefix <<
"padding: ";
5559 std::cerr << os.str();
5561 const int myRank = ! verbose ? -1 : [&] () {
5562 auto map = this->getMap();
5563 if (map.is_null()) {
5566 auto comm = map->getComm();
5567 if (comm.is_null()) {
5570 return comm->getRank();
5574 if (! myGraph_->indicesAreAllocated()) {
5576 std::ostringstream os;
5577 os << *prefix <<
"Call allocateIndices" << endl;
5578 std::cerr << os.str();
5580 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5592 std::ostringstream os;
5593 os << *prefix <<
"Allocate row_ptrs_beg: "
5594 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5595 std::cerr << os.str();
5597 using Kokkos::view_alloc;
5598 using Kokkos::WithoutInitializing;
5599 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5600 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5602 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5604 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5605 size_t(row_ptr_beg.extent(0) - 1);
5607 std::ostringstream os;
5608 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5609 std::cerr << os.str();
5611 row_ptrs_type row_ptr_end(
5612 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5614 row_ptrs_type num_row_entries_d;
5616 const bool refill_num_row_entries =
5617 myGraph_->k_numRowEntries_.extent(0) != 0;
5619 if (refill_num_row_entries) {
5622 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5623 myGraph_->k_numRowEntries_);
5624 Kokkos::parallel_for
5625 (
"Fill end row pointers", range_policy(0, N),
5626 KOKKOS_LAMBDA (
const size_t i) {
5627 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5634 Kokkos::parallel_for
5635 (
"Fill end row pointers", range_policy(0, N),
5636 KOKKOS_LAMBDA (
const size_t i) {
5637 row_ptr_end(i) = row_ptr_beg(i+1);
5641 if (myGraph_->isGloballyIndexed()) {
5643 myGraph_->gblInds_wdv,
5644 valuesUnpacked_wdv, padding, myRank, verbose);
5645 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5646 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5648 (newValuesLen != newColIndsLen, std::logic_error,
5649 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5650 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5655 myGraph_->lclIndsUnpacked_wdv,
5656 valuesUnpacked_wdv, padding, myRank, verbose);
5657 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5658 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5659 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5660 (newValuesLen != newColIndsLen, std::logic_error,
5661 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5662 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5666 if (refill_num_row_entries) {
5667 Kokkos::parallel_for
5668 (
"Fill num entries", range_policy(0, N),
5669 KOKKOS_LAMBDA (
const size_t i) {
5670 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5676 std::ostringstream os;
5677 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5678 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5679 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5680 std::cerr << os.str();
5681 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5682 row_ptr_beg.extent(0) );
5684 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5687 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5689 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5690 copyAndPermuteStaticGraph(
5691 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5692 const size_t numSameIDs,
5693 const LocalOrdinal permuteToLIDs[],
5694 const LocalOrdinal permuteFromLIDs[],
5695 const size_t numPermutes)
5697 using Details::ProfilingRegion;
5698 using Teuchos::Array;
5699 using Teuchos::ArrayView;
5701 using LO = LocalOrdinal;
5702 using GO = GlobalOrdinal;
5703 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5704 const char suffix[] =
5705 " Please report this bug to the Tpetra developers.";
5706 ProfilingRegion regionCAP
5707 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5711 std::unique_ptr<std::string> prefix;
5713 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5714 std::ostringstream os;
5715 os << *prefix <<
"Start" << endl;
5717 const char*
const prefix_raw =
5718 verbose ? prefix.get()->c_str() :
nullptr;
5720 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5725 const map_type& srcRowMap = * (srcMat.getRowMap ());
5726 nonconst_global_inds_host_view_type rowInds;
5727 nonconst_values_host_view_type rowVals;
5728 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5729 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5733 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5734 const GO targetGID = sourceGID;
5736 ArrayView<const GO>rowIndsConstView;
5737 ArrayView<const Scalar> rowValsConstView;
5739 if (sourceIsLocallyIndexed) {
5740 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5741 if (rowLength > static_cast<size_t> (rowInds.size())) {
5742 Kokkos::resize(rowInds,rowLength);
5743 Kokkos::resize(rowVals,rowLength);
5747 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5748 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5753 size_t checkRowLength = 0;
5754 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5755 rowValsView, checkRowLength);
5757 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5758 (rowLength != checkRowLength, std::logic_error,
"For "
5759 "global row index " << sourceGID <<
", the source "
5760 "matrix's getNumEntriesInGlobalRow returns a row length "
5761 "of " << rowLength <<
", but getGlobalRowCopy reports "
5762 "a row length of " << checkRowLength <<
"." << suffix);
5769 rowIndsConstView = Teuchos::ArrayView<const GO> (
5770 rowIndsView.data(), rowIndsView.extent(0),
5771 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5772 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5773 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5774 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5779 global_inds_host_view_type rowIndsView;
5780 values_host_view_type rowValsView;
5781 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5786 rowIndsConstView = Teuchos::ArrayView<const GO> (
5787 rowIndsView.data(), rowIndsView.extent(0),
5788 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5789 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5790 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5791 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5799 combineGlobalValues(targetGID, rowIndsConstView,
5801 prefix_raw, debug, verbose);
5805 std::ostringstream os;
5806 os << *prefix <<
"Do permutes" << endl;
5809 const map_type& tgtRowMap = * (this->getRowMap ());
5810 for (
size_t p = 0; p < numPermutes; ++p) {
5811 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5812 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5814 ArrayView<const GO> rowIndsConstView;
5815 ArrayView<const Scalar> rowValsConstView;
5817 if (sourceIsLocallyIndexed) {
5818 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5819 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5820 Kokkos::resize(rowInds,rowLength);
5821 Kokkos::resize(rowVals,rowLength);
5825 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5826 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5831 size_t checkRowLength = 0;
5832 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5833 rowValsView, checkRowLength);
5835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5836 (rowLength != checkRowLength, std::logic_error,
"For "
5837 "source matrix global row index " << sourceGID <<
", "
5838 "getNumEntriesInGlobalRow returns a row length of " <<
5839 rowLength <<
", but getGlobalRowCopy a row length of "
5840 << checkRowLength <<
"." << suffix);
5847 rowIndsConstView = Teuchos::ArrayView<const GO> (
5848 rowIndsView.data(), rowIndsView.extent(0),
5849 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5850 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5851 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5852 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5857 global_inds_host_view_type rowIndsView;
5858 values_host_view_type rowValsView;
5859 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5864 rowIndsConstView = Teuchos::ArrayView<const GO> (
5865 rowIndsView.data(), rowIndsView.extent(0),
5866 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5867 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5868 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5869 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5874 combineGlobalValues(targetGID, rowIndsConstView,
5876 prefix_raw, debug, verbose);
5880 std::ostringstream os;
5881 os << *prefix <<
"Done" << endl;
5885 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5887 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5888 copyAndPermuteNonStaticGraph(
5889 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5890 const size_t numSameIDs,
5891 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5892 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5893 const size_t numPermutes)
5895 using Details::ProfilingRegion;
5896 using Teuchos::Array;
5897 using Teuchos::ArrayView;
5899 using LO = LocalOrdinal;
5900 using GO = GlobalOrdinal;
5901 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5902 const char suffix[] =
5903 " Please report this bug to the Tpetra developers.";
5904 ProfilingRegion regionCAP
5905 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5909 std::unique_ptr<std::string> prefix;
5911 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5912 std::ostringstream os;
5913 os << *prefix <<
"Start" << endl;
5915 const char*
const prefix_raw =
5916 verbose ? prefix.get()->c_str() :
nullptr;
5919 using row_graph_type = RowGraph<LO, GO, Node>;
5920 const row_graph_type& srcGraph = *(srcMat.getGraph());
5922 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5923 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5924 applyCrsPadding(*padding, verbose);
5926 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5931 const map_type& srcRowMap = * (srcMat.getRowMap ());
5932 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5933 using gids_type = nonconst_global_inds_host_view_type;
5934 using vals_type = nonconst_values_host_view_type;
5937 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5941 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5942 const GO targetGID = sourceGID;
5944 ArrayView<const GO> rowIndsConstView;
5945 ArrayView<const Scalar> rowValsConstView;
5947 if (sourceIsLocallyIndexed) {
5949 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5950 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5951 Kokkos::resize(rowInds,rowLength);
5952 Kokkos::resize(rowVals,rowLength);
5956 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5957 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5962 size_t checkRowLength = 0;
5963 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5966 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5967 (rowLength != checkRowLength, std::logic_error,
": For "
5968 "global row index " << sourceGID <<
", the source "
5969 "matrix's getNumEntriesInGlobalRow returns a row length "
5970 "of " << rowLength <<
", but getGlobalRowCopy reports "
5971 "a row length of " << checkRowLength <<
"." << suffix);
5973 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5974 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5977 global_inds_host_view_type rowIndsView;
5978 values_host_view_type rowValsView;
5979 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5985 rowIndsConstView = Teuchos::ArrayView<const GO> (
5986 rowIndsView.data(), rowIndsView.extent(0),
5987 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5988 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5989 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5990 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5996 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5997 rowValsConstView, prefix_raw, debug, verbose);
6001 std::ostringstream os;
6002 os << *prefix <<
"Do permutes" << endl;
6004 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
6005 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
6007 const map_type& tgtRowMap = * (this->getRowMap ());
6008 for (
size_t p = 0; p < numPermutes; ++p) {
6009 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6010 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6012 ArrayView<const GO> rowIndsConstView;
6013 ArrayView<const Scalar> rowValsConstView;
6015 if (sourceIsLocallyIndexed) {
6016 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6017 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
6018 Kokkos::resize(rowInds,rowLength);
6019 Kokkos::resize(rowVals,rowLength);
6023 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6024 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6029 size_t checkRowLength = 0;
6030 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6031 rowValsView, checkRowLength);
6033 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6034 (rowLength != checkRowLength, std::logic_error,
"For "
6035 "source matrix global row index " << sourceGID <<
", "
6036 "getNumEntriesInGlobalRow returns a row length of " <<
6037 rowLength <<
", but getGlobalRowCopy a row length of "
6038 << checkRowLength <<
"." << suffix);
6040 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6041 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6044 global_inds_host_view_type rowIndsView;
6045 values_host_view_type rowValsView;
6046 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6052 rowIndsConstView = Teuchos::ArrayView<const GO> (
6053 rowIndsView.data(), rowIndsView.extent(0),
6054 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6055 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6056 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6057 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6063 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6064 rowValsConstView, prefix_raw, debug, verbose);
6068 std::ostringstream os;
6069 os << *prefix <<
"Done" << endl;
6073 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6078 const size_t numSameIDs,
6079 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6080 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6089 const char tfecfFuncName[] =
"copyAndPermute: ";
6090 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6092 const bool verbose = Behavior::verbose(
"CrsMatrix");
6093 std::unique_ptr<std::string> prefix;
6095 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6096 std::ostringstream os;
6097 os << *prefix << endl
6098 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6099 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6108 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6110 std::cerr << os.str ();
6113 const auto numPermute = permuteToLIDs.extent (0);
6114 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6115 (numPermute != permuteFromLIDs.extent (0),
6116 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6117 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6118 << permuteFromLIDs.extent (0) <<
".");
6123 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6124 if (isStaticGraph ()) {
6125 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6126 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6127 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6128 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6130 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6131 permuteToLIDs_h.data(),
6132 permuteFromLIDs_h.data(),
6136 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6137 permuteFromLIDs, numPermute);
6141 std::ostringstream os;
6142 os << *prefix <<
"Done" << endl;
6143 std::cerr << os.str();
6147 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6152 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6153 Kokkos::DualView<char*, buffer_device_type>& exports,
6154 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6155 size_t& constantNumPackets)
6160 using Teuchos::outArg;
6161 using Teuchos::REDUCE_MAX;
6162 using Teuchos::reduceAll;
6164 typedef LocalOrdinal LO;
6165 typedef GlobalOrdinal GO;
6166 const char tfecfFuncName[] =
"packAndPrepare: ";
6167 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6169 const bool debug = Behavior::debug(
"CrsMatrix");
6170 const bool verbose = Behavior::verbose(
"CrsMatrix");
6173 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6174 if (pComm.is_null ()) {
6177 const Teuchos::Comm<int>& comm = *pComm;
6178 const int myRank = comm.getSize ();
6180 std::unique_ptr<std::string> prefix;
6182 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6183 std::ostringstream os;
6184 os << *prefix <<
"Start" << endl
6194 std::cerr << os.str ();
6217 std::ostringstream msg;
6220 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6221 const crs_matrix_type* srcCrsMat =
6222 dynamic_cast<const crs_matrix_type*
> (&source);
6223 if (srcCrsMat !=
nullptr) {
6225 std::ostringstream os;
6226 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6227 "calling packNew" << endl;
6228 std::cerr << os.str ();
6231 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6232 constantNumPackets);
6234 catch (std::exception& e) {
6236 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6240 using Kokkos::HostSpace;
6241 using Kokkos::subview;
6242 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6243 using range_type = Kokkos::pair<size_t, size_t>;
6246 std::ostringstream os;
6247 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6249 std::cerr << os.str ();
6252 const row_matrix_type* srcRowMat =
6253 dynamic_cast<const row_matrix_type*
> (&source);
6254 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6255 (srcRowMat ==
nullptr, std::invalid_argument,
6256 "The source object of the Import or Export operation is neither a "
6257 "CrsMatrix (with the same template parameters as the target object), "
6258 "nor a RowMatrix (with the same first four template parameters as the "
6269 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6270 auto exportLIDs_h = exportLIDs.view_host ();
6271 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6272 exportLIDs_h.size ());
6276 Teuchos::Array<char> exports_a;
6282 numPacketsPerLID.clear_sync_state ();
6283 numPacketsPerLID.modify_host ();
6284 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6285 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6286 numPacketsPerLID_h.size ());
6291 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6292 constantNumPackets);
6294 catch (std::exception& e) {
6296 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6300 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6301 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6302 const std::string oldLabel = exports.d_view.label ();
6303 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6304 exports = exports_type (newLabel, newAllocSize);
6309 exports.modify_host();
6311 auto exports_h = exports.view_host ();
6312 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6316 typedef typename exports_type::t_host::execution_space HES;
6317 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6318 Kokkos::View<const char*, host_device_type>
6319 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6326 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6330 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6331 "one or more participating processes.");
6335 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6336 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6337 "or more participating processes. Here is this process' error "
6338 "message: " << msg.str ());
6342 std::ostringstream os;
6343 os << *prefix <<
"packAndPrepare: Done!" << endl
6353 std::cerr << os.str ();
6357 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6359 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6360 packRow (
char exports[],
6361 const size_t offset,
6362 const size_t numEnt,
6363 const GlobalOrdinal gidsIn[],
6364 const impl_scalar_type valsIn[],
6365 const size_t numBytesPerValue)
const
6368 using Kokkos::subview;
6370 typedef LocalOrdinal LO;
6371 typedef GlobalOrdinal GO;
6372 typedef impl_scalar_type ST;
6380 const LO numEntLO =
static_cast<size_t> (numEnt);
6382 const size_t numEntBeg = offset;
6384 const size_t gidsBeg = numEntBeg + numEntLen;
6385 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6386 const size_t valsBeg = gidsBeg + gidsLen;
6387 const size_t valsLen = numEnt * numBytesPerValue;
6389 char*
const numEntOut = exports + numEntBeg;
6390 char*
const gidsOut = exports + gidsBeg;
6391 char*
const valsOut = exports + valsBeg;
6393 size_t numBytesOut = 0;
6398 Kokkos::pair<int, size_t> p;
6399 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6400 errorCode += p.first;
6401 numBytesOut += p.second;
6403 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6404 errorCode += p.first;
6405 numBytesOut += p.second;
6408 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6409 TEUCHOS_TEST_FOR_EXCEPTION
6410 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6411 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6412 << expectedNumBytes <<
".");
6413 TEUCHOS_TEST_FOR_EXCEPTION
6414 (errorCode != 0, std::runtime_error,
"packRow: "
6415 "PackTraits::packArray returned a nonzero error code");
6420 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6422 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6423 unpackRow (GlobalOrdinal gidsOut[],
6424 impl_scalar_type valsOut[],
6425 const char imports[],
6426 const size_t offset,
6427 const size_t numBytes,
6428 const size_t numEnt,
6429 const size_t numBytesPerValue)
6432 using Kokkos::subview;
6434 typedef LocalOrdinal LO;
6435 typedef GlobalOrdinal GO;
6436 typedef impl_scalar_type ST;
6438 Details::ProfilingRegion region_upack_row(
6439 "Tpetra::CrsMatrix::unpackRow",
6443 if (numBytes == 0) {
6446 const int myRank = this->getMap ()->getComm ()->getRank ();
6447 TEUCHOS_TEST_FOR_EXCEPTION
6448 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6449 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6450 "number of entries to unpack (as reported by numPacketsPerLID) "
6451 "for this row numEnt=" << numEnt <<
" != 0.");
6456 if (numEnt == 0 && numBytes != 0) {
6457 const int myRank = this->getMap ()->getComm ()->getRank ();
6458 TEUCHOS_TEST_FOR_EXCEPTION
6459 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6460 "unpackRow: The number of entries to unpack (as reported by "
6461 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6462 "numBytes=" << numBytes <<
" != 0.");
6468 const size_t numEntBeg = offset;
6470 const size_t gidsBeg = numEntBeg + numEntLen;
6471 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6472 const size_t valsBeg = gidsBeg + gidsLen;
6473 const size_t valsLen = numEnt * numBytesPerValue;
6475 const char*
const numEntIn = imports + numEntBeg;
6476 const char*
const gidsIn = imports + gidsBeg;
6477 const char*
const valsIn = imports + valsBeg;
6479 size_t numBytesOut = 0;
6483 if (static_cast<size_t> (numEntOut) != numEnt ||
6484 numEntOut == static_cast<LO> (0)) {
6485 const int myRank = this->getMap ()->getComm ()->getRank ();
6486 std::ostringstream os;
6487 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6488 bool firstErrorCondition =
false;
6489 if (static_cast<size_t> (numEntOut) != numEnt) {
6490 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6491 <<
" does not equal number of entries unpacked from imports "
6492 "buffer numEntOut=" << numEntOut <<
".";
6493 firstErrorCondition =
true;
6495 if (numEntOut == static_cast<LO> (0)) {
6496 if (firstErrorCondition) {
6499 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6500 "but number of bytes to unpack for this row numBytes=" << numBytes
6501 <<
" != 0. This should never happen, since packRow should only "
6502 "ever pack rows with a nonzero number of entries. In this case, "
6503 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6506 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6510 Kokkos::pair<int, size_t> p;
6511 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6512 errorCode += p.first;
6513 numBytesOut += p.second;
6515 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6516 errorCode += p.first;
6517 numBytesOut += p.second;
6520 TEUCHOS_TEST_FOR_EXCEPTION
6521 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6522 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6524 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6525 TEUCHOS_TEST_FOR_EXCEPTION
6526 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6527 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6528 << expectedNumBytes <<
".");
6530 TEUCHOS_TEST_FOR_EXCEPTION
6531 (errorCode != 0, std::runtime_error,
"unpackRow: "
6532 "PackTraits::unpackArray returned a nonzero error code");
6537 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6539 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6540 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6541 size_t& totalNumEntries,
6542 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6544 using Details::Behavior;
6547 typedef impl_scalar_type IST;
6548 typedef LocalOrdinal LO;
6549 typedef GlobalOrdinal GO;
6555 const bool verbose = Behavior::verbose(
"CrsMatrix");
6556 std::unique_ptr<std::string> prefix;
6558 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6559 std::ostringstream os;
6560 os << *prefix <<
"Before:"
6568 std::cerr << os.str ();
6573 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6575 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6576 auto exportLIDs_h = exportLIDs.view_host ();
6579 totalNumEntries = 0;
6580 for (LO i = 0; i < numExportLIDs; ++i) {
6581 const LO lclRow = exportLIDs_h[i];
6582 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6585 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6588 totalNumEntries += curNumEntries;
6599 const size_t allocSize =
6600 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6601 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6602 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6603 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6605 const std::string oldLabel = exports.d_view.label ();
6606 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6607 exports = exports_type (newLabel, allocSize);
6611 std::ostringstream os;
6612 os << *prefix <<
"After:"
6620 std::cerr << os.str ();
6624 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6627 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6628 Kokkos::DualView<char*, buffer_device_type>& exports,
6629 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6630 size_t& constantNumPackets)
const
6634 if (this->isStaticGraph ()) {
6637 constantNumPackets);
6640 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6641 constantNumPackets);
6645 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6648 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6649 Kokkos::DualView<char*, buffer_device_type>& exports,
6650 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6651 size_t& constantNumPackets)
const
6659 using LO = LocalOrdinal;
6660 using GO = GlobalOrdinal;
6661 using ST = impl_scalar_type;
6662 const char tfecfFuncName[] =
"packNonStaticNew: ";
6664 const bool verbose = Behavior::verbose(
"CrsMatrix");
6665 std::unique_ptr<std::string> prefix;
6667 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6668 std::ostringstream os;
6669 os << *prefix <<
"Start" << endl;
6670 std::cerr << os.str ();
6673 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6675 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6676 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6677 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6683 constantNumPackets = 0;
6688 size_t totalNumEntries = 0;
6689 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6690 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6693 exports.clear_sync_state();
6694 exports.modify_host();
6695 auto exports_h = exports.view_host ();
6697 std::ostringstream os;
6698 os << *prefix <<
"After marking exports as modified on host, "
6700 std::cerr << os.str ();
6704 auto exportLIDs_h = exportLIDs.view_host ();
6707 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6708 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6709 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6714 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6718 typename global_inds_host_view_type::non_const_type gidsIn_k;
6719 if (this->isLocallyIndexed()) {
6721 typename global_inds_host_view_type::non_const_type(
"packGids",
6726 for (
size_t i = 0; i < numExportLIDs; ++i) {
6727 const LO lclRow = exportLIDs_h[i];
6729 size_t numBytes = 0;
6730 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6737 numPacketsPerLID_h[i] = 0;
6741 if (this->isLocallyIndexed ()) {
6742 typename global_inds_host_view_type::non_const_type gidsIn;
6743 values_host_view_type valsIn;
6747 local_inds_host_view_type lidsIn;
6748 this->getLocalRowView (lclRow, lidsIn, valsIn);
6749 const map_type& colMap = * (this->getColMap ());
6750 for (
size_t k = 0; k < numEnt; ++k) {
6751 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6753 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6755 const size_t numBytesPerValue =
6756 PackTraits<ST>::packValueCount (valsIn[0]);
6757 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6758 gidsIn.data (), valsIn.data (),
6761 else if (this->isGloballyIndexed ()) {
6762 global_inds_host_view_type gidsIn;
6763 values_host_view_type valsIn;
6769 const map_type& rowMap = * (this->getRowMap ());
6770 const GO gblRow = rowMap.getGlobalElement (lclRow);
6771 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6773 const size_t numBytesPerValue =
6774 PackTraits<ST>::packValueCount (valsIn[0]);
6775 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6776 gidsIn.data (), valsIn.data (),
6783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6784 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6785 "First invalid offset into 'exports' pack buffer at index i = " << i
6786 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6787 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6792 numPacketsPerLID_h[i] = numBytes;
6797 std::ostringstream os;
6798 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6805 std::cerr << os.str ();
6809 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6811 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6812 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6813 const LocalOrdinal numEnt,
6814 const impl_scalar_type vals[],
6815 const GlobalOrdinal cols[],
6817 const char*
const prefix,
6821 using GO = GlobalOrdinal;
6825 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6826 Teuchos::ArrayView<const GO> cols_av
6827 (numEnt == 0 ?
nullptr : cols, numEnt);
6828 Teuchos::ArrayView<const Scalar> vals_av
6829 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6834 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6835 prefix, debug, verbose);
6839 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6841 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6842 combineGlobalValues(
6843 const GlobalOrdinal globalRowIndex,
6844 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6845 const Teuchos::ArrayView<const Scalar>& values,
6847 const char*
const prefix,
6851 const char tfecfFuncName[] =
"combineGlobalValues: ";
6853 if (isStaticGraph ()) {
6857 if (combineMode ==
ADD) {
6858 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6860 else if (combineMode ==
REPLACE) {
6861 replaceGlobalValues (globalRowIndex, columnIndices, values);
6863 else if (combineMode ==
ABSMAX) {
6864 using ::Tpetra::Details::AbsMax;
6866 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6870 else if (combineMode ==
INSERT) {
6871 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6872 (isStaticGraph() && combineMode ==
INSERT,
6873 std::invalid_argument,
"INSERT combine mode is forbidden "
6874 "if the matrix has a static (const) graph (i.e., was "
6875 "constructed with the CrsMatrix constructor that takes a "
6876 "const CrsGraph pointer).");
6879 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6880 (
true, std::logic_error,
"Invalid combine mode; should "
6882 "Please report this bug to the Tpetra developers.");
6886 if (combineMode ==
ADD || combineMode ==
INSERT) {
6893 insertGlobalValuesFilteredChecked(globalRowIndex,
6894 columnIndices, values, prefix, debug, verbose);
6905 else if (combineMode ==
ABSMAX) {
6906 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6907 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6908 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6911 else if (combineMode ==
REPLACE) {
6912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6913 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6914 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6918 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6919 true, std::logic_error,
"Should never get here! Please report this "
6920 "bug to the Tpetra developers.");
6925 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6929 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6930 Kokkos::DualView<char*, buffer_device_type> imports,
6931 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6932 const size_t constantNumPackets,
6939 const char tfecfFuncName[] =
"unpackAndCombine: ";
6940 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6942 const bool debug = Behavior::debug(
"CrsMatrix");
6943 const bool verbose = Behavior::verbose(
"CrsMatrix");
6944 constexpr
int numValidModes = 5;
6947 const char* validModeNames[numValidModes] =
6948 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6950 std::unique_ptr<std::string> prefix;
6952 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6953 std::ostringstream os;
6954 os << *prefix <<
"Start:" << endl
6964 << *prefix <<
" constantNumPackets: " << constantNumPackets
6968 std::cerr << os.str ();
6972 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6973 validModes+numValidModes) {
6974 std::ostringstream os;
6975 os <<
"Invalid combine mode. Valid modes are {";
6976 for (
int k = 0; k < numValidModes; ++k) {
6977 os << validModeNames[k];
6978 if (k < numValidModes - 1) {
6983 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6984 (
true, std::invalid_argument, os.str ());
6986 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6987 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6988 std::invalid_argument,
"importLIDs.extent(0)="
6989 << importLIDs.extent(0)
6990 <<
" != numPacketsPerLID.extent(0)="
6991 << numPacketsPerLID.extent(0) <<
".");
6994 if (combineMode ==
ZERO) {
6999 using Teuchos::reduceAll;
7000 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
7003 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7004 constantNumPackets, combineMode,
7006 }
catch (std::exception& e) {
7011 const Teuchos::Comm<int>& comm = * (this->getComm ());
7012 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
7013 lclBad, Teuchos::outArg (gblBad));
7019 std::ostringstream os;
7020 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
7021 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
7022 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7024 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
7025 "threw an exception on one or more participating processes: "
7026 << endl << msg->str ());
7030 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7031 constantNumPackets, combineMode,
7036 std::ostringstream os;
7037 os << *prefix <<
"Done!" << endl
7047 std::cerr << os.str ();
7051 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7055 const Kokkos::DualView<
const local_ordinal_type*,
7056 buffer_device_type>& importLIDs,
7057 Kokkos::DualView<char*, buffer_device_type> imports,
7058 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7059 const size_t constantNumPackets,
7064 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7068 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7069 std::unique_ptr<std::string> prefix;
7071 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7072 std::ostringstream os;
7073 os << *prefix <<
"isStaticGraph(): "
7074 << (isStaticGraph() ?
"true" :
"false")
7075 <<
", importLIDs.extent(0): "
7076 << importLIDs.extent(0)
7077 <<
", imports.extent(0): "
7078 << imports.extent(0)
7079 <<
", numPacketsPerLID.extent(0): "
7080 << numPacketsPerLID.extent(0)
7082 std::cerr << os.str();
7085 if (isStaticGraph ()) {
7086 using Details::unpackCrsMatrixAndCombineNew;
7087 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7088 importLIDs, constantNumPackets,
7093 using padding_type =
typename crs_graph_type::padding_type;
7094 std::unique_ptr<padding_type> padding;
7096 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7097 importLIDs, imports, numPacketsPerLID, verbose);
7099 catch (std::exception& e) {
7100 const auto rowMap = getRowMap();
7101 const auto comm = rowMap.is_null() ? Teuchos::null :
7103 const int myRank = comm.is_null() ? -1 : comm->getRank();
7104 TEUCHOS_TEST_FOR_EXCEPTION
7105 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7106 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7107 "threw an exception: " << e.what());
7110 std::ostringstream os;
7111 os << *prefix <<
"Call applyCrsPadding" << endl;
7112 std::cerr << os.str();
7114 applyCrsPadding(*padding, verbose);
7117 std::ostringstream os;
7118 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7119 std::cerr << os.str();
7121 unpackAndCombineImplNonStatic(importLIDs, imports,
7128 std::ostringstream os;
7129 os << *prefix <<
"Done" << endl;
7130 std::cerr << os.str();
7134 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7136 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7137 unpackAndCombineImplNonStatic(
7138 const Kokkos::DualView<
const local_ordinal_type*,
7139 buffer_device_type>& importLIDs,
7140 Kokkos::DualView<char*, buffer_device_type> imports,
7141 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7142 const size_t constantNumPackets,
7146 using Kokkos::subview;
7147 using Kokkos::MemoryUnmanaged;
7148 using Details::Behavior;
7151 using Details::PackTraits;
7152 using Details::ScalarViewTraits;
7154 using LO = LocalOrdinal;
7155 using GO = GlobalOrdinal;
7156 using ST = impl_scalar_type;
7157 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7159 typename View<int*, device_type>::HostMirror::execution_space;
7160 using pair_type = std::pair<typename View<int*, HES>::size_type,
7161 typename View<int*, HES>::size_type>;
7162 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7163 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7164 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7166 const bool debug = Behavior::debug(
"CrsMatrix");
7167 const bool verbose = Behavior::verbose(
"CrsMatrix");
7168 std::unique_ptr<std::string> prefix;
7170 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7171 std::ostringstream os;
7172 os << *prefix << endl;
7173 std::cerr << os.str ();
7175 const char*
const prefix_raw =
7176 verbose ? prefix.get()->c_str() :
nullptr;
7178 const size_type numImportLIDs = importLIDs.extent (0);
7179 if (combineMode ==
ZERO || numImportLIDs == 0) {
7183 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7184 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7189 if (imports.need_sync_host()) {
7190 imports.sync_host ();
7192 auto imports_h = imports.view_host();
7195 if (numPacketsPerLID.need_sync_host()) {
7196 numPacketsPerLID.sync_host ();
7198 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7200 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7201 auto importLIDs_h = importLIDs.view_host();
7203 size_t numBytesPerValue;
7214 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7219 size_t maxRowNumEnt = 0;
7220 for (size_type i = 0; i < numImportLIDs; ++i) {
7221 const size_t numBytes = numPacketsPerLID_h[i];
7222 if (numBytes == 0) {
7227 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7228 (offset + numBytes >
size_t(imports_h.extent (0)),
7229 std::logic_error,
": At local row index importLIDs_h[i="
7230 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7231 <<
") + numBytes (=" << numBytes <<
") > "
7232 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7237 const size_t theNumBytes =
7239 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7240 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7241 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7243 const char*
const inBuf = imports_h.data () + offset;
7244 const size_t actualNumBytes =
7248 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7249 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7250 <<
", actualNumBytes=" << actualNumBytes
7251 <<
" > numBytes=" << numBytes <<
".");
7252 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7253 (numEntLO == 0, std::logic_error,
": At local row index "
7254 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7255 "the number of entries read from the packed data is "
7256 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7260 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7268 View<GO*, HES> gblColInds;
7269 View<LO*, HES> lclColInds;
7270 View<ST*, HES> vals;
7283 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7284 gid, maxRowNumEnt,
"gids");
7285 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7286 lid, maxRowNumEnt,
"lids");
7287 vals = ScalarViewTraits<ST, HES>::allocateArray(
7288 val, maxRowNumEnt,
"vals");
7292 for (size_type i = 0; i < numImportLIDs; ++i) {
7293 const size_t numBytes = numPacketsPerLID_h[i];
7294 if (numBytes == 0) {
7298 const char*
const inBuf = imports_h.data () + offset;
7301 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7302 const LO lclRow = importLIDs_h[i];
7304 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7305 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7307 const size_t numBytesOut =
7308 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7309 offset, numBytes, numEnt, numBytesPerValue);
7310 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7311 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7312 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7313 << numBytesOut <<
".");
7315 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7316 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7317 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7318 combineMode, prefix_raw, debug, verbose);
7324 std::ostringstream os;
7325 os << *prefix <<
"Done" << endl;
7326 std::cerr << os.str();
7330 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7331 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7334 const bool force)
const
7336 using Teuchos::null;
7340 TEUCHOS_TEST_FOR_EXCEPTION(
7341 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7342 "MapMultiVector: You may only call this method if the matrix has a "
7343 "column Map. If the matrix does not yet have a column Map, you should "
7344 "first call fillComplete (with domain and range Map if necessary).");
7348 TEUCHOS_TEST_FOR_EXCEPTION(
7349 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7350 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7351 "this matrix's graph is fill complete.");
7354 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7355 RCP<const map_type> colMap = this->getColMap ();
7368 if (! importer.is_null () || force) {
7369 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7370 X_colMap = rcp (
new MV (colMap, numVecs));
7373 importMV_ = X_colMap;
7376 X_colMap = importMV_;
7387 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7388 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7391 const bool force)
const
7393 using Teuchos::null;
7399 TEUCHOS_TEST_FOR_EXCEPTION(
7400 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7401 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7402 "matrix's graph is fill complete.");
7405 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7409 RCP<const map_type> rowMap = this->getRowMap ();
7421 if (! exporter.is_null () || force) {
7422 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7423 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7424 exportMV_ = Y_rowMap;
7427 Y_rowMap = exportMV_;
7433 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7438 TEUCHOS_TEST_FOR_EXCEPTION(
7439 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7440 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7441 "was created with a constant graph (that is, when it was created using "
7442 "the version of its constructor that takes an RCP<const CrsGraph>). "
7443 "This is because the matrix is not allowed to modify the graph in that "
7444 "case, but removing empty processes requires modifying the graph.");
7445 myGraph_->removeEmptyProcessesInPlace (newMap);
7449 this->map_ = this->getRowMap ();
7453 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7456 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7457 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7462 const Teuchos::RCP<const map_type>& domainMap,
7463 const Teuchos::RCP<const map_type>& rangeMap,
7464 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7466 using Teuchos::Array;
7467 using Teuchos::ArrayView;
7468 using Teuchos::ParameterList;
7471 using Teuchos::rcp_implicit_cast;
7472 using Teuchos::sublist;
7476 using crs_matrix_type =
7478 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7482 std::unique_ptr<std::string> prefix;
7484 prefix = this->createPrefix(
"CrsMatrix",
"add");
7485 std::ostringstream os;
7486 os << *prefix <<
"Start" << endl;
7487 std::cerr << os.str ();
7490 const crs_matrix_type& B = *
this;
7491 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7492 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7499 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7500 RCP<const map_type> B_domainMap = B.getDomainMap ();
7501 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7503 RCP<const map_type> theDomainMap = domainMap;
7504 RCP<const map_type> theRangeMap = rangeMap;
7506 if (domainMap.is_null ()) {
7507 if (B_domainMap.is_null ()) {
7508 TEUCHOS_TEST_FOR_EXCEPTION(
7509 A_domainMap.is_null (), std::invalid_argument,
7510 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7511 "then you must supply a nonnull domain Map to this method.");
7512 theDomainMap = A_domainMap;
7514 theDomainMap = B_domainMap;
7517 if (rangeMap.is_null ()) {
7518 if (B_rangeMap.is_null ()) {
7519 TEUCHOS_TEST_FOR_EXCEPTION(
7520 A_rangeMap.is_null (), std::invalid_argument,
7521 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7522 "then you must supply a nonnull range Map to this method.");
7523 theRangeMap = A_rangeMap;
7525 theRangeMap = B_rangeMap;
7533 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7534 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7535 TEUCHOS_TEST_FOR_EXCEPTION
7536 (! B_domainMap->isSameAs(*A_domainMap),
7537 std::invalid_argument,
7538 errPfx <<
"The input RowMatrix A must have a domain Map "
7539 "which is the same as (isSameAs) this RowMatrix's "
7541 TEUCHOS_TEST_FOR_EXCEPTION
7542 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7543 errPfx <<
"The input RowMatrix A must have a range Map "
7544 "which is the same as (isSameAs) this RowMatrix's range "
7546 TEUCHOS_TEST_FOR_EXCEPTION
7547 (! domainMap.is_null() &&
7548 ! domainMap->isSameAs(*B_domainMap),
7549 std::invalid_argument,
7550 errPfx <<
"The input domain Map must be the same as "
7551 "(isSameAs) this RowMatrix's domain Map.");
7552 TEUCHOS_TEST_FOR_EXCEPTION
7553 (! rangeMap.is_null() &&
7554 ! rangeMap->isSameAs(*B_rangeMap),
7555 std::invalid_argument,
7556 errPfx <<
"The input range Map must be the same as "
7557 "(isSameAs) this RowMatrix's range Map.");
7560 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7561 TEUCHOS_TEST_FOR_EXCEPTION
7562 (! domainMap.is_null() &&
7563 ! domainMap->isSameAs(*B_domainMap),
7564 std::invalid_argument,
7565 errPfx <<
"The input domain Map must be the same as "
7566 "(isSameAs) this RowMatrix's domain Map.");
7567 TEUCHOS_TEST_FOR_EXCEPTION
7568 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7569 std::invalid_argument,
7570 errPfx <<
"The input range Map must be the same as "
7571 "(isSameAs) this RowMatrix's range Map.");
7574 TEUCHOS_TEST_FOR_EXCEPTION
7575 (domainMap.is_null() || rangeMap.is_null(),
7576 std::invalid_argument, errPfx <<
"If neither A nor B "
7577 "have a domain and range Map, then you must supply a "
7578 "nonnull domain and range Map to this method.");
7585 bool callFillComplete =
true;
7586 RCP<ParameterList> constructorSublist;
7587 RCP<ParameterList> fillCompleteSublist;
7588 if (! params.is_null()) {
7590 params->get(
"Call fillComplete", callFillComplete);
7591 constructorSublist = sublist(params,
"Constructor parameters");
7592 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7595 RCP<const map_type> A_rowMap = A.
getRowMap ();
7596 RCP<const map_type> B_rowMap = B.getRowMap ();
7597 RCP<const map_type> C_rowMap = B_rowMap;
7598 RCP<crs_matrix_type> C;
7604 if (A_rowMap->isSameAs (*B_rowMap)) {
7605 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7606 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7609 if (alpha != ZERO) {
7610 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7612 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7617 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7618 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7619 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7623 if (constructorSublist.is_null ()) {
7624 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7626 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7627 constructorSublist));
7638 TEUCHOS_TEST_FOR_EXCEPTION
7639 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7640 "be the same for statically allocated matrices, to ensure "
7641 "that there is sufficient space to do the addition.");
7644 TEUCHOS_TEST_FOR_EXCEPTION
7645 (C.is_null (), std::logic_error,
7646 errPfx <<
"C should not be null at this point. "
7647 "Please report this bug to the Tpetra developers.");
7650 std::ostringstream os;
7651 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7652 std::cerr << os.str ();
7654 using gids_type = nonconst_global_inds_host_view_type;
7655 using vals_type = nonconst_values_host_view_type;
7659 if (alpha != ZERO) {
7660 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7661 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7663 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7664 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7665 Kokkos::resize(ind,A_numEntries);
7666 Kokkos::resize(val,A_numEntries);
7668 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7669 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7673 for (
size_t k = 0; k < A_numEntries; ++k) {
7674 valView[k] *= alpha;
7677 C->insertGlobalValues (globalRow, A_numEntries,
7678 reinterpret_cast<Scalar *>(valView.data()),
7684 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7685 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7686 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7687 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7688 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7689 Kokkos::resize(ind,B_numEntries);
7690 Kokkos::resize(val,B_numEntries);
7692 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7693 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7694 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7697 for (
size_t k = 0; k < B_numEntries; ++k) {
7701 C->insertGlobalValues (globalRow, B_numEntries,
7702 reinterpret_cast<Scalar *>(valView.data()),
7707 if (callFillComplete) {
7709 std::ostringstream os;
7710 os << *prefix <<
"Call fillComplete on C" << endl;
7711 std::cerr << os.str ();
7713 if (fillCompleteSublist.is_null ()) {
7714 C->fillComplete (theDomainMap, theRangeMap);
7716 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7720 std::ostringstream os;
7721 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7722 std::cerr << os.str ();
7726 std::ostringstream os;
7727 os << *prefix <<
"Done" << endl;
7728 std::cerr << os.str ();
7735 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7739 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7740 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7741 const Teuchos::RCP<const map_type>& domainMap,
7742 const Teuchos::RCP<const map_type>& rangeMap,
7743 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7750 using Teuchos::ArrayRCP;
7751 using Teuchos::ArrayView;
7752 using Teuchos::Comm;
7753 using Teuchos::ParameterList;
7756 typedef LocalOrdinal LO;
7757 typedef GlobalOrdinal GO;
7758 typedef node_type NT;
7763 const bool debug = Behavior::debug(
"CrsMatrix");
7764 const bool verbose = Behavior::verbose(
"CrsMatrix");
7765 int MyPID = getComm ()->getRank ();
7767 std::unique_ptr<std::string> verbosePrefix;
7770 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7771 std::ostringstream os;
7772 os <<
"Start" << endl;
7773 std::cerr << os.str();
7780 bool reverseMode =
false;
7781 bool restrictComm =
false;
7783 int mm_optimization_core_count =
7784 Behavior::TAFC_OptimizationCoreCount();
7785 RCP<ParameterList> matrixparams;
7786 bool overrideAllreduce =
false;
7787 bool useKokkosPath =
false;
7788 if (! params.is_null ()) {
7789 matrixparams = sublist (params,
"CrsMatrix");
7790 reverseMode = params->get (
"Reverse Mode", reverseMode);
7791 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7792 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7793 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7794 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7795 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7797 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7798 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7799 if(reverseMode) isMM =
false;
7803 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7805 int reduced_mismatch = 0;
7806 if (isMM && !overrideAllreduce) {
7809 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7810 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7811 rowTransfer.getRemoteLIDs ().size() == 0);
7812 mismatch = (source_vals != target_vals) ? 1 : 0;
7815 Teuchos::REDUCE_MAX, * (getComm ()));
7818 #ifdef HAVE_TPETRA_MMM_TIMINGS
7819 using Teuchos::TimeMonitor;
7821 if(!params.is_null())
7822 label = params->get(
"Timer Label",label);
7823 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7826 std::ostringstream os;
7827 if(isMM) os<<
":MMOpt";
7828 else os<<
":MMLegacy";
7832 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7840 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7841 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7842 TEUCHOS_TEST_FOR_EXCEPTION(
7843 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7844 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7845 "argument must be either an Import or an Export, and its template "
7846 "parameters must match the corresponding template parameters of the "
7854 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7855 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7857 if(! domainTransfer.is_null()) {
7858 TEUCHOS_TEST_FOR_EXCEPTION(
7859 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7860 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7861 "argument must be either an Import or an Export, and its template "
7862 "parameters must match the corresponding template parameters of the "
7865 TEUCHOS_TEST_FOR_EXCEPTION(
7866 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7867 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7868 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7869 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7870 "arguments must be of the same type (either Import or Export).");
7872 TEUCHOS_TEST_FOR_EXCEPTION(
7873 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7874 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7875 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7876 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7877 "arguments must be of the same type (either Import or Export).");
7883 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7887 RCP<const map_type> MyRowMap = reverseMode ?
7888 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7889 RCP<const map_type> MyColMap;
7890 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7891 domainMap : getDomainMap ();
7892 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7893 rangeMap : getRangeMap ();
7894 RCP<const map_type> BaseRowMap = MyRowMap;
7895 RCP<const map_type> BaseDomainMap = MyDomainMap;
7903 if (! destMat.is_null ()) {
7914 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7915 ! destMat->getGraph ()->isGloballyIndexed ();
7916 TEUCHOS_TEST_FOR_EXCEPTION(
7917 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7918 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7919 "to be nonnull, if its graph is empty (neither locally nor globally "
7928 TEUCHOS_TEST_FOR_EXCEPTION(
7929 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7930 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7931 "input argument 'destMat' is not the same as the (row) Map specified "
7932 "by the input argument 'rowTransfer'.");
7933 TEUCHOS_TEST_FOR_EXCEPTION(
7934 ! destMat->checkSizes (*
this), std::invalid_argument,
7935 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7936 "destination matrix, but checkSizes() indicates that it is not a legal "
7937 "legal target for redistribution from the source matrix (*this). This "
7938 "may mean that they do not have the same dimensions.");
7952 TEUCHOS_TEST_FOR_EXCEPTION(
7953 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7954 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7955 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7956 TEUCHOS_TEST_FOR_EXCEPTION(
7957 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7958 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7959 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7962 TEUCHOS_TEST_FOR_EXCEPTION(
7963 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7964 std::invalid_argument,
7965 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7966 "argument must be the same as the rebalanced domain map 'domainMap'");
7968 TEUCHOS_TEST_FOR_EXCEPTION(
7969 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7970 std::invalid_argument,
7971 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7972 "argument must be the same as the rebalanced domain map 'domainMap'");
7985 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7986 ArrayView<const LO> ExportLIDs = reverseMode ?
7987 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7988 auto RemoteLIDs = reverseMode ?
7989 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7990 auto PermuteToLIDs = reverseMode ?
7991 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7992 auto PermuteFromLIDs = reverseMode ?
7993 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7994 Distributor& Distor = rowTransfer.getDistributor ();
7997 Teuchos::Array<int> SourcePids;
8000 RCP<const map_type> ReducedRowMap, ReducedColMap,
8001 ReducedDomainMap, ReducedRangeMap;
8002 RCP<const Comm<int> > ReducedComm;
8006 if (destMat.is_null ()) {
8007 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
8014 #ifdef HAVE_TPETRA_MMM_TIMINGS
8015 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
8017 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8018 ReducedComm = ReducedRowMap.is_null () ?
8020 ReducedRowMap->getComm ();
8021 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8023 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8025 MyDomainMap->replaceCommWithSubset (ReducedComm);
8026 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8028 MyRangeMap->replaceCommWithSubset (ReducedComm);
8031 MyRowMap = ReducedRowMap;
8032 MyDomainMap = ReducedDomainMap;
8033 MyRangeMap = ReducedRangeMap;
8036 if (! ReducedComm.is_null ()) {
8037 MyPID = ReducedComm->getRank ();
8044 ReducedComm = MyRowMap->getComm ();
8053 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8056 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8058 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8059 #ifdef HAVE_TPETRA_MMM_TIMINGS
8060 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8068 Import_Util::getPids (*MyImporter, SourcePids,
false);
8070 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8073 #ifdef HAVE_TPETRA_MMM_TIMINGS
8074 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8076 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8077 IntVectorType SourceCol_pids(getColMap());
8079 SourceDomain_pids.putScalar(MyPID);
8081 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8082 SourcePids.resize (getColMap ()->getLocalNumElements ());
8083 SourceCol_pids.get1dCopy (SourcePids ());
8085 else if (MyImporter.is_null ()) {
8087 #ifdef HAVE_TPETRA_MMM_TIMINGS
8088 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8090 SourcePids.resize (getColMap ()->getLocalNumElements ());
8091 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8093 else if ( ! MyImporter.is_null () &&
8094 ! domainTransfer.is_null () ) {
8099 #ifdef HAVE_TPETRA_MMM_TIMINGS
8100 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8104 IntVectorType TargetDomain_pids (domainMap);
8105 TargetDomain_pids.putScalar (MyPID);
8108 IntVectorType SourceDomain_pids (getDomainMap ());
8111 IntVectorType SourceCol_pids (getColMap ());
8113 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8114 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8116 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8117 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8119 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8120 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8122 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8123 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8126 TEUCHOS_TEST_FOR_EXCEPTION(
8127 true, std::logic_error,
"Tpetra::CrsMatrix::"
8128 "transferAndFillComplete: Should never get here! "
8129 "Please report this bug to a Tpetra developer.");
8131 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8132 SourcePids.resize (getColMap ()->getLocalNumElements ());
8133 SourceCol_pids.get1dCopy (SourcePids ());
8135 else if ( ! MyImporter.is_null () &&
8136 BaseDomainMap->isSameAs (*BaseRowMap) &&
8137 getDomainMap ()->isSameAs (*getRowMap ())) {
8139 #ifdef HAVE_TPETRA_MMM_TIMINGS
8140 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8143 IntVectorType TargetRow_pids (domainMap);
8144 IntVectorType SourceRow_pids (getRowMap ());
8145 IntVectorType SourceCol_pids (getColMap ());
8147 TargetRow_pids.putScalar (MyPID);
8148 if (! reverseMode && xferAsImport !=
nullptr) {
8149 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8151 else if (reverseMode && xferAsExport !=
nullptr) {
8152 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8154 else if (! reverseMode && xferAsExport !=
nullptr) {
8155 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8157 else if (reverseMode && xferAsImport !=
nullptr) {
8158 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8161 TEUCHOS_TEST_FOR_EXCEPTION(
8162 true, std::logic_error,
"Tpetra::CrsMatrix::"
8163 "transferAndFillComplete: Should never get here! "
8164 "Please report this bug to a Tpetra developer.");
8167 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8168 SourcePids.resize (getColMap ()->getLocalNumElements ());
8169 SourceCol_pids.get1dCopy (SourcePids ());
8172 TEUCHOS_TEST_FOR_EXCEPTION(
8173 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8174 "transferAndFillComplete: This method only allows either domainMap == "
8175 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8176 "getDomainMap () == getRowMap ()).");
8180 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8182 #ifdef HAVE_TPETRA_MMM_TIMINGS
8183 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8185 if (constantNumPackets == 0) {
8186 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8187 RemoteLIDs.view_host().size ());
8194 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8195 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8201 #ifdef HAVE_TPETRA_MMM_TIMINGS
8202 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8205 using Teuchos::outArg;
8206 using Teuchos::REDUCE_MAX;
8207 using Teuchos::reduceAll;
8210 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8211 const int myRank = comm->getRank ();
8213 std::ostringstream errStrm;
8217 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8220 destMat->numExportPacketsPerLID_.modify_host ();
8221 numExportPacketsPerLID =
8224 catch (std::exception& e) {
8225 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8226 << e.what () << std::endl;
8230 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8231 "an exception not a subclass of std::exception" << std::endl;
8235 if (! comm.is_null ()) {
8236 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8239 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8240 TEUCHOS_TEST_FOR_EXCEPTION(
8241 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8242 "exception on at least one process.");
8246 std::ostringstream os;
8247 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8249 std::cerr << os.str ();
8254 numExportPacketsPerLID,
8257 constantNumPackets);
8259 catch (std::exception& e) {
8260 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8261 << e.what () << std::endl;
8265 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8266 "an exception not a subclass of std::exception" << std::endl;
8271 std::ostringstream os;
8272 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8274 std::cerr << os.str ();
8277 if (! comm.is_null ()) {
8278 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8281 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8282 TEUCHOS_TEST_FOR_EXCEPTION(
8283 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8284 "exception on at least one process.");
8289 destMat->numExportPacketsPerLID_.modify_host ();
8290 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8293 std::ostringstream os;
8294 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8296 std::cerr << os.str ();
8300 numExportPacketsPerLID,
8303 constantNumPackets);
8305 std::ostringstream os;
8306 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8308 std::cerr << os.str ();
8315 #ifdef HAVE_TPETRA_MMM_TIMINGS
8316 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8318 if (! communication_needed) {
8320 std::ostringstream os;
8321 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8322 std::cerr << os.str ();
8327 if (constantNumPackets == 0) {
8329 std::ostringstream os;
8330 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8332 std::cerr << os.str ();
8337 destMat->numExportPacketsPerLID_.sync_host ();
8338 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8340 destMat->numImportPacketsPerLID_.sync_host ();
8341 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8345 std::ostringstream os;
8346 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8348 std::cerr << os.str ();
8350 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8351 destMat->numImportPacketsPerLID_.view_host());
8353 std::ostringstream os;
8354 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8356 std::cerr << os.str ();
8359 size_t totalImportPackets = 0;
8360 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8361 totalImportPackets += numImportPacketsPerLID[i];
8366 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8367 verbosePrefix.get ());
8368 destMat->imports_.modify_host ();
8369 auto hostImports = destMat->imports_.view_host();
8372 destMat->exports_.sync_host ();
8373 auto hostExports = destMat->exports_.view_host();
8375 std::ostringstream os;
8376 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8378 std::cerr << os.str ();
8380 Distor.doReversePostsAndWaits (hostExports,
8381 numExportPacketsPerLID,
8383 numImportPacketsPerLID);
8385 std::ostringstream os;
8386 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8388 std::cerr << os.str ();
8393 std::ostringstream os;
8394 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8396 std::cerr << os.str ();
8398 destMat->imports_.modify_host ();
8399 auto hostImports = destMat->imports_.view_host();
8402 destMat->exports_.sync_host ();
8403 auto hostExports = destMat->exports_.view_host();
8405 std::ostringstream os;
8406 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8408 std::cerr << os.str ();
8410 Distor.doReversePostsAndWaits (hostExports,
8414 std::ostringstream os;
8415 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8417 std::cerr << os.str ();
8422 if (constantNumPackets == 0) {
8424 std::ostringstream os;
8425 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8427 std::cerr << os.str ();
8432 destMat->numExportPacketsPerLID_.sync_host ();
8433 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8435 destMat->numImportPacketsPerLID_.sync_host ();
8436 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8439 std::ostringstream os;
8440 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8442 std::cerr << os.str ();
8444 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8445 destMat->numImportPacketsPerLID_.view_host());
8447 std::ostringstream os;
8448 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8450 std::cerr << os.str ();
8453 size_t totalImportPackets = 0;
8454 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8455 totalImportPackets += numImportPacketsPerLID[i];
8460 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8461 verbosePrefix.get ());
8462 destMat->imports_.modify_host ();
8463 auto hostImports = destMat->imports_.view_host();
8466 destMat->exports_.sync_host ();
8467 auto hostExports = destMat->exports_.view_host();
8469 std::ostringstream os;
8470 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8472 std::cerr << os.str ();
8474 Distor.doPostsAndWaits (hostExports,
8475 numExportPacketsPerLID,
8477 numImportPacketsPerLID);
8479 std::ostringstream os;
8480 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8482 std::cerr << os.str ();
8487 std::ostringstream os;
8488 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8490 std::cerr << os.str ();
8492 destMat->imports_.modify_host ();
8493 auto hostImports = destMat->imports_.view_host();
8496 destMat->exports_.sync_host ();
8497 auto hostExports = destMat->exports_.view_host();
8499 std::ostringstream os;
8500 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8502 std::cerr << os.str ();
8504 Distor.doPostsAndWaits (hostExports,
8508 std::ostringstream os;
8509 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8511 std::cerr << os.str ();
8522 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8524 Teuchos::Array<int> RemotePids;
8526 Teuchos::Array<int> TargetPids;
8532 destMat->numImportPacketsPerLID_.modify_host();
8534 # ifdef HAVE_TPETRA_MMM_TIMINGS
8535 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8537 ArrayRCP<size_t> CSR_rowptr;
8538 ArrayRCP<GO> CSR_colind_GID;
8539 ArrayRCP<LO> CSR_colind_LID;
8540 ArrayRCP<Scalar> CSR_vals;
8542 destMat->imports_.sync_device ();
8543 destMat->numImportPacketsPerLID_.sync_device ();
8545 size_t N = BaseRowMap->getLocalNumElements ();
8547 auto RemoteLIDs_d = RemoteLIDs.view_device();
8548 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8549 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8554 destMat->imports_.view_device(),
8555 destMat->numImportPacketsPerLID_.view_device(),
8569 if (
typeid (LO) ==
typeid (GO)) {
8570 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8573 CSR_colind_LID.resize (CSR_colind_GID.size());
8575 CSR_colind_LID.resize (CSR_colind_GID.size());
8580 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8582 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8584 #ifdef HAVE_TPETRA_MMM_TIMINGS
8585 tmCopySPRdata = Teuchos::null;
8594 std::ostringstream os;
8595 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8597 std::cerr << os.str ();
8600 #ifdef HAVE_TPETRA_MMM_TIMINGS
8601 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8603 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8613 std::ostringstream os;
8614 os << *verbosePrefix <<
"restrictComm="
8615 << (restrictComm ?
"true" :
"false") << std::endl;
8616 std::cerr << os.str ();
8623 #ifdef HAVE_TPETRA_MMM_TIMINGS
8624 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8627 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8629 MyColMap->replaceCommWithSubset (ReducedComm);
8630 MyColMap = ReducedColMap;
8635 std::ostringstream os;
8636 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8637 std::cerr << os.str ();
8639 destMat->replaceColMap (MyColMap);
8646 if (ReducedComm.is_null ()) {
8648 std::ostringstream os;
8649 os << *verbosePrefix <<
"I am no longer in the communicator; "
8650 "returning" << std::endl;
8651 std::cerr << os.str ();
8660 if ((! reverseMode && xferAsImport !=
nullptr) ||
8661 (reverseMode && xferAsExport !=
nullptr)) {
8663 std::ostringstream os;
8664 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8665 std::cerr << os.str ();
8667 #ifdef HAVE_TPETRA_MMM_TIMINGS
8668 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8670 Import_Util::sortCrsEntries (CSR_rowptr(),
8674 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8675 (reverseMode && xferAsImport !=
nullptr)) {
8677 std::ostringstream os;
8678 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8680 std::cerr << os.str();
8682 #ifdef HAVE_TPETRA_MMM_TIMINGS
8683 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8685 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8688 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8689 CSR_colind_LID.resize (CSR_rowptr[N]);
8690 CSR_vals.resize (CSR_rowptr[N]);
8694 TEUCHOS_TEST_FOR_EXCEPTION(
8695 true, std::logic_error,
"Tpetra::CrsMatrix::"
8696 "transferAndFillComplete: Should never get here! "
8697 "Please report this bug to a Tpetra developer.");
8704 std::ostringstream os;
8705 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8706 std::cerr << os.str ();
8715 #ifdef HAVE_TPETRA_MMM_TIMINGS
8716 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8718 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8730 destMat->numImportPacketsPerLID_.modify_host();
8732 # ifdef HAVE_TPETRA_MMM_TIMINGS
8733 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8735 ArrayRCP<size_t> CSR_rowptr;
8736 ArrayRCP<GO> CSR_colind_GID;
8737 ArrayRCP<LO> CSR_colind_LID;
8738 ArrayRCP<Scalar> CSR_vals;
8740 destMat->imports_.sync_device ();
8741 destMat->numImportPacketsPerLID_.sync_device ();
8743 size_t N = BaseRowMap->getLocalNumElements ();
8745 auto RemoteLIDs_d = RemoteLIDs.view_device();
8746 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8747 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8749 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8750 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8751 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8752 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8753 Kokkos::View<int*,device_type> TargetPids_d;
8758 destMat->imports_.view_device(),
8759 destMat->numImportPacketsPerLID_.view_device(),
8771 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8773 #ifdef HAVE_TPETRA_MMM_TIMINGS
8774 tmCopySPRdata = Teuchos::null;
8783 std::ostringstream os;
8784 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8786 std::cerr << os.str ();
8789 #ifdef HAVE_TPETRA_MMM_TIMINGS
8790 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8792 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8802 std::ostringstream os;
8803 os << *verbosePrefix <<
"restrictComm="
8804 << (restrictComm ?
"true" :
"false") << std::endl;
8805 std::cerr << os.str ();
8812 #ifdef HAVE_TPETRA_MMM_TIMINGS
8813 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8816 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8818 MyColMap->replaceCommWithSubset (ReducedComm);
8819 MyColMap = ReducedColMap;
8824 std::ostringstream os;
8825 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8826 std::cerr << os.str ();
8828 destMat->replaceColMap (MyColMap);
8835 if (ReducedComm.is_null ()) {
8837 std::ostringstream os;
8838 os << *verbosePrefix <<
"I am no longer in the communicator; "
8839 "returning" << std::endl;
8840 std::cerr << os.str ();
8850 if ((! reverseMode && xferAsImport !=
nullptr) ||
8851 (reverseMode && xferAsExport !=
nullptr)) {
8853 std::ostringstream os;
8854 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8855 std::cerr << os.str ();
8857 #ifdef HAVE_TPETRA_MMM_TIMINGS
8858 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8860 Import_Util::sortCrsEntries (CSR_rowptr_d,
8864 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8865 (reverseMode && xferAsImport !=
nullptr)) {
8867 std::ostringstream os;
8868 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8870 std::cerr << os.str();
8872 #ifdef HAVE_TPETRA_MMM_TIMINGS
8873 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8875 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8880 TEUCHOS_TEST_FOR_EXCEPTION(
8881 true, std::logic_error,
"Tpetra::CrsMatrix::"
8882 "transferAndFillComplete: Should never get here! "
8883 "Please report this bug to a Tpetra developer.");
8891 std::ostringstream os;
8892 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8893 std::cerr << os.str ();
8897 #ifdef HAVE_TPETRA_MMM_TIMINGS
8898 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8900 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8908 #ifdef HAVE_TPETRA_MMM_TIMINGS
8909 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8912 Teuchos::ParameterList esfc_params;
8914 RCP<import_type> MyImport;
8917 if (iallreduceRequest.get () !=
nullptr) {
8919 std::ostringstream os;
8920 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8922 std::cerr << os.str ();
8924 iallreduceRequest->wait ();
8925 if (reduced_mismatch != 0) {
8931 #ifdef HAVE_TPETRA_MMM_TIMINGS
8932 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8937 std::ostringstream os;
8938 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8939 std::cerr << os.str ();
8942 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8943 Teuchos::ArrayRCP<int> type3PIDs;
8944 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8945 auto colind = getCrsGraph()->getLocalIndicesHost();
8948 std::ostringstream os;
8949 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8950 std::cerr << os.str ();
8954 #ifdef HAVE_TPETRA_MMM_TIMINGS
8955 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8957 Import_Util::reverseNeighborDiscovery(*
this,
8969 std::ostringstream os;
8970 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8971 std::cerr << os.str ();
8974 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8975 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8977 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8978 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8980 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8982 std::vector<bool> IsOwned(numCols,
true);
8983 std::vector<int> SentTo(numCols,-1);
8984 if (! MyImporter.is_null ()) {
8985 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8986 IsOwned[rlid]=
false;
8990 std::vector<std::pair<int,GO> > usrtg;
8991 usrtg.reserve(TEPID2.size());
8994 const auto& colMap = * (this->getColMap ());
8996 const LO row = TELID2[i];
8997 const int pid = TEPID2[i];
8998 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8999 const int col = colind[j];
9000 if (IsOwned[col] && SentTo[col] != pid) {
9002 GO gid = colMap.getGlobalElement (col);
9003 usrtg.push_back (std::pair<int,GO> (pid, gid));
9011 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
9013 usrtg.erase(eopg,usrtg.end());
9016 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
9017 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
9020 for(
auto && p : usrtg) {
9021 EPID2[pos]= p.first;
9022 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9026 Teuchos::ArrayView<int> EPID3 = type3PIDs();
9027 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9028 GO InfGID = std::numeric_limits<GO>::max();
9029 int InfPID = INT_MAX;
9032 #endif // TPETRA_MIN3
9033 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9034 int i1=0, i2=0, i3=0;
9035 int Len1 = EPID1.size();
9036 int Len2 = EPID2.size();
9037 int Len3 = EPID3.size();
9039 int MyLen=Len1+Len2+Len3;
9040 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9041 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9044 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9045 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9046 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9047 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9049 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9050 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9051 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9053 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9054 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9057 #endif // TPETRA_MIN3
9058 bool added_entry=
false;
9060 if(PID1 == MIN_PID && GID1 == MIN_GID){
9061 userExportLIDs[iloc]=ELID1[i1];
9062 userExportPIDs[iloc]=EPID1[i1];
9067 if(PID2 == MIN_PID && GID2 == MIN_GID){
9069 userExportLIDs[iloc]=ELID2[i2];
9070 userExportPIDs[iloc]=EPID2[i2];
9076 if(PID3 == MIN_PID && GID3 == MIN_GID){
9078 userExportLIDs[iloc]=ELID3[i3];
9079 userExportPIDs[iloc]=EPID3[i3];
9087 std::ostringstream os;
9088 os << *verbosePrefix <<
"Create Import" << std::endl;
9089 std::cerr << os.str ();
9092 #ifdef HAVE_TPETRA_MMM_TIMINGS
9093 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9095 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9097 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9098 MyImport = rcp (
new import_type (MyDomainMap,
9101 userExportLIDs.view(0,iloc).getConst(),
9102 userExportPIDs.view(0,iloc).getConst(),
9107 std::ostringstream os;
9108 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9109 std::cerr << os.str ();
9113 #ifdef HAVE_TPETRA_MMM_TIMINGS
9114 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9115 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9117 if(!params.is_null())
9118 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9119 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9125 #ifdef HAVE_TPETRA_MMM_TIMINGS
9126 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9129 std::ostringstream os;
9130 os << *verbosePrefix <<
"Create Import" << std::endl;
9131 std::cerr << os.str ();
9134 #ifdef HAVE_TPETRA_MMM_TIMINGS
9135 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9137 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9138 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9139 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9140 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9143 std::ostringstream os;
9144 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9145 std::cerr << os.str ();
9148 #ifdef HAVE_TPETRA_MMM_TIMINGS
9149 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9150 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9152 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9155 if (!params.is_null ()) {
9156 esfc_params.set (
"compute global constants",
9157 params->get (
"compute global constants",
true));
9159 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9160 MyImport, Teuchos::null,
9161 rcp (
new Teuchos::ParameterList (esfc_params)));
9164 #ifdef HAVE_TPETRA_MMM_TIMINGS
9165 tmIESFC = Teuchos::null;
9169 std::ostringstream os;
9170 os << *verbosePrefix <<
"Done" << endl;
9171 std::cerr << os.str ();
9176 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9181 const Teuchos::RCP<const map_type>& domainMap,
9182 const Teuchos::RCP<const map_type>& rangeMap,
9183 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9185 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9188 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9194 const Teuchos::RCP<const map_type>& domainMap,
9195 const Teuchos::RCP<const map_type>& rangeMap,
9196 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9198 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9201 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9206 const Teuchos::RCP<const map_type>& domainMap,
9207 const Teuchos::RCP<const map_type>& rangeMap,
9208 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9210 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9219 const Teuchos::RCP<const map_type>& domainMap,
9220 const Teuchos::RCP<const map_type>& rangeMap,
9221 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9223 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9234 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9236 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9238 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9240 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9241 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9243 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9245 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9246 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9247 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9248 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9249 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9250 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9251 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9253 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9254 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9255 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9256 const Teuchos::RCP<Teuchos::ParameterList>& params);
9258 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9260 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9261 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9262 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9263 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9264 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9265 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9266 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9267 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9268 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9269 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9270 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9271 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9272 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9273 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9274 const Teuchos::RCP<Teuchos::ParameterList>& params);
9277 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9279 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9280 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9281 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9282 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9283 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9284 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9285 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9286 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9287 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9288 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9289 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9290 const Teuchos::RCP<Teuchos::ParameterList>& params);
9292 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9294 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9295 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9296 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9297 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9298 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9299 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9300 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9301 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9302 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9303 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9304 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9305 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9306 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9307 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9308 const Teuchos::RCP<Teuchos::ParameterList>& params);
9311 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9312 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9313 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9314 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9315 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9316 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9318 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.