10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
32 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
40 #include "Tpetra_Details_packCrsMatrix.hpp"
41 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
43 #include "Teuchos_FancyOStream.hpp"
44 #include "Teuchos_RCP.hpp"
45 #include "Teuchos_DataAccess.hpp"
46 #include "Teuchos_SerialDenseMatrix.hpp"
47 #include "KokkosBlas1_scal.hpp"
48 #include "KokkosSparse_getDiagCopy.hpp"
49 #include "KokkosSparse_spmv.hpp"
61 template<
class T,
class BinaryFunction>
62 T atomic_binary_function_update (
volatile T*
const dest,
76 T newVal = f (assume, inputVal);
77 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
78 }
while (assume != oldVal);
98 template<
class Scalar>
102 typedef Teuchos::ScalarTraits<Scalar> STS;
103 return std::max (STS::magnitude (x), STS::magnitude (y));
112 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
114 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
115 size_t maxNumEntriesPerRow,
116 const Teuchos::RCP<Teuchos::ParameterList>& params) :
119 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
120 "[, RCP<ParameterList>]): ";
121 Teuchos::RCP<crs_graph_type> graph;
123 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
126 catch (std::exception& e) {
127 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
128 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
129 "size_t [, RCP<ParameterList>]) threw an exception: "
136 staticGraph_ = myGraph_;
141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
144 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
145 const Teuchos::RCP<Teuchos::ParameterList>& params) :
148 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
149 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
150 Teuchos::RCP<crs_graph_type> graph;
156 catch (std::exception& e) {
157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
158 (
true, std::runtime_error,
"CrsGraph constructor "
159 "(RCP<const Map>, ArrayView<const size_t>"
160 "[, RCP<ParameterList>]) threw an exception: "
167 staticGraph_ = graph;
172 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
175 const Teuchos::RCP<const map_type>& colMap,
176 const size_t maxNumEntPerRow,
177 const Teuchos::RCP<Teuchos::ParameterList>& params) :
180 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
181 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
182 const char suffix[] =
183 " Please report this bug to the Tpetra developers.";
186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
187 (! staticGraph_.is_null (), std::logic_error,
188 "staticGraph_ is not null at the beginning of the constructor."
190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
191 (! myGraph_.is_null (), std::logic_error,
192 "myGraph_ is not null at the beginning of the constructor."
194 Teuchos::RCP<crs_graph_type> graph;
200 catch (std::exception& e) {
201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
202 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
203 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
204 "exception: " << e.what ());
210 staticGraph_ = myGraph_;
215 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
218 const Teuchos::RCP<const map_type>& colMap,
219 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
220 const Teuchos::RCP<Teuchos::ParameterList>& params) :
223 const char tfecfFuncName[] =
224 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
225 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
226 Teuchos::RCP<crs_graph_type> graph;
232 catch (std::exception& e) {
233 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
234 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
235 "RCP<const Map>, ArrayView<const size_t>[, "
236 "RCP<ParameterList>]) threw an exception: " << e.what ());
242 staticGraph_ = graph;
248 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
250 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
251 const Teuchos::RCP<Teuchos::ParameterList>& ) :
253 staticGraph_ (graph),
254 storageStatus_ (Details::STORAGE_1D_PACKED)
257 typedef typename local_matrix_device_type::values_type values_type;
258 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
259 "RCP<ParameterList>]): ";
262 std::unique_ptr<std::string> prefix;
264 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
265 std::ostringstream os;
266 os << *prefix <<
"Start" << endl;
267 std::cerr << os.str ();
270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
271 (graph.is_null (), std::runtime_error,
"Input graph is null.");
272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
273 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
274 "is not fill complete. You must call fillComplete on the "
275 "graph before using it to construct a CrsMatrix. Note that "
276 "calling resumeFill on the graph makes it not fill complete, "
277 "even if you had previously called fillComplete. In that "
278 "case, you must call fillComplete on the graph again.");
286 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
288 std::ostringstream os;
289 os << *prefix <<
"Allocate values: " << numEnt << endl;
290 std::cerr << os.str ();
293 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
295 valuesUnpacked_wdv = valuesPacked_wdv;
300 std::ostringstream os;
301 os << *prefix <<
"Done" << endl;
302 std::cerr << os.str ();
306 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
309 const Teuchos::RCP<const crs_graph_type>& graph,
310 const Teuchos::RCP<Teuchos::ParameterList>& params) :
312 staticGraph_ (graph),
313 storageStatus_ (matrix.storageStatus_)
315 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
316 "local_matrix_device_type::values_type, "
317 "[,RCP<ParameterList>]): ";
318 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
319 (graph.is_null (), std::runtime_error,
"Input graph is null.");
320 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
321 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
322 "is not fill complete. You must call fillComplete on the "
323 "graph before using it to construct a CrsMatrix. Note that "
324 "calling resumeFill on the graph makes it not fill complete, "
325 "even if you had previously called fillComplete. In that "
326 "case, you must call fillComplete on the graph again.");
328 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
329 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
331 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
332 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
338 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
340 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
341 const typename local_matrix_device_type::values_type& values,
342 const Teuchos::RCP<Teuchos::ParameterList>& ) :
344 staticGraph_ (graph),
345 storageStatus_ (Details::STORAGE_1D_PACKED)
347 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
348 "local_matrix_device_type::values_type, "
349 "[,RCP<ParameterList>]): ";
350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
351 (graph.is_null (), std::runtime_error,
"Input graph is null.");
352 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
353 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
354 "is not fill complete. You must call fillComplete on the "
355 "graph before using it to construct a CrsMatrix. Note that "
356 "calling resumeFill on the graph makes it not fill complete, "
357 "even if you had previously called fillComplete. In that "
358 "case, you must call fillComplete on the graph again.");
367 valuesUnpacked_wdv = valuesPacked_wdv;
378 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
381 const Teuchos::RCP<const map_type>& colMap,
382 const typename local_graph_device_type::row_map_type& rowPointers,
383 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
384 const typename local_matrix_device_type::values_type& values,
385 const Teuchos::RCP<Teuchos::ParameterList>& params) :
387 storageStatus_ (Details::STORAGE_1D_PACKED)
389 using Details::getEntryOnHost;
392 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
393 "RCP<const Map>, ptr, ind, val[, params]): ";
394 const char suffix[] =
395 ". Please report this bug to the Tpetra developers.";
399 std::unique_ptr<std::string> prefix;
401 prefix = this->createPrefix(
402 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
403 std::ostringstream os;
404 os << *prefix <<
"Start" << endl;
405 std::cerr << os.str ();
412 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
413 (values.extent(0) != columnIndices.extent(0),
414 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
415 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
417 if (debug && rowPointers.extent(0) != 0) {
418 const size_t numEnt =
419 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
420 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
421 (numEnt !=
size_t(columnIndices.extent(0)) ||
422 numEnt !=
size_t(values.extent(0)),
423 std::invalid_argument,
"Last entry of rowPointers says that "
424 "the matrix has " << numEnt <<
" entr"
425 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
426 "columnIndices and values don't match this. "
427 "columnIndices.extent(0)=" << columnIndices.extent (0)
428 <<
" and values.extent(0)=" << values.extent (0) <<
".");
431 RCP<crs_graph_type> graph;
433 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
434 columnIndices, params));
436 catch (std::exception& e) {
437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
438 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
439 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
447 auto lclGraph = graph->getLocalGraphDevice ();
448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
449 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
450 lclGraph.entries.extent (0) != columnIndices.extent (0),
451 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
452 "ind[, params]) did not set the local graph correctly." << suffix);
453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
454 (lclGraph.entries.extent (0) != values.extent (0),
455 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
456 "params]) did not set the local graph correctly. "
457 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
458 <<
" != values.extent(0) = " << values.extent (0) << suffix);
464 staticGraph_ = graph;
474 valuesUnpacked_wdv = valuesPacked_wdv;
483 std::ostringstream os;
484 os << *prefix <<
"Done" << endl;
485 std::cerr << os.str();
489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
492 const Teuchos::RCP<const map_type>& colMap,
493 const Teuchos::ArrayRCP<size_t>& ptr,
494 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
495 const Teuchos::ArrayRCP<Scalar>& val,
496 const Teuchos::RCP<Teuchos::ParameterList>& params) :
498 storageStatus_ (Details::STORAGE_1D_PACKED)
500 using Kokkos::Compat::getKokkosViewDeepCopy;
501 using Teuchos::av_reinterpret_cast;
503 using values_type =
typename local_matrix_device_type::values_type;
505 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
506 "RCP<const Map>, ptr, ind, val[, params]): ";
508 RCP<crs_graph_type> graph;
513 catch (std::exception& e) {
514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
515 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
516 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
517 "RCP<ParameterList>]) threw an exception: " << e.what ());
523 staticGraph_ = graph;
536 auto lclGraph = staticGraph_->getLocalGraphDevice ();
537 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
538 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
539 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
540 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
541 "ptr, ind[, params]) did not set the local graph correctly. "
542 "Please report this bug to the Tpetra developers.");
545 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
547 valuesUnpacked_wdv = valuesPacked_wdv;
557 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
560 const Teuchos::RCP<const map_type>& colMap,
562 const Teuchos::RCP<Teuchos::ParameterList>& params) :
564 storageStatus_ (Details::STORAGE_1D_PACKED),
567 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
568 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
569 const char suffix[] =
570 " Please report this bug to the Tpetra developers.";
572 Teuchos::RCP<crs_graph_type> graph;
575 lclMatrix.graph, params));
577 catch (std::exception& e) {
578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
579 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
580 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
581 "exception: " << e.what ());
583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
584 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
585 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
586 "did not produce a fill-complete graph. Please report this bug to the "
587 "Tpetra developers.");
592 staticGraph_ = graph;
595 valuesUnpacked_wdv = valuesPacked_wdv;
597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
599 "At the end of a CrsMatrix constructor that should produce "
600 "a fillComplete matrix, isFillActive() is true." << suffix);
601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
603 "CrsMatrix constructor that should produce a fillComplete "
604 "matrix, isFillComplete() is false." << suffix);
608 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
611 const Teuchos::RCP<const map_type>& rowMap,
612 const Teuchos::RCP<const map_type>& colMap,
613 const Teuchos::RCP<const map_type>& domainMap,
614 const Teuchos::RCP<const map_type>& rangeMap,
615 const Teuchos::RCP<Teuchos::ParameterList>& params) :
617 storageStatus_ (Details::STORAGE_1D_PACKED),
620 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
621 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
622 "local_matrix_device_type[, RCP<ParameterList>]): ";
623 const char suffix[] =
624 " Please report this bug to the Tpetra developers.";
626 Teuchos::RCP<crs_graph_type> graph;
628 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
629 domainMap, rangeMap, params));
631 catch (std::exception& e) {
632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
633 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
634 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
635 "RCP<ParameterList>]) threw an exception: " << e.what ());
637 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
638 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
639 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
640 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
641 "not produce a fillComplete graph." << suffix);
646 staticGraph_ = graph;
649 valuesUnpacked_wdv = valuesPacked_wdv;
651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
653 "At the end of a CrsMatrix constructor that should produce "
654 "a fillComplete matrix, isFillActive() is true." << suffix);
655 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
657 "CrsMatrix constructor that should produce a fillComplete "
658 "matrix, isFillComplete() is false." << suffix);
662 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
665 const Teuchos::RCP<const map_type>& rowMap,
666 const Teuchos::RCP<const map_type>& colMap,
667 const Teuchos::RCP<const map_type>& domainMap,
668 const Teuchos::RCP<const map_type>& rangeMap,
669 const Teuchos::RCP<const import_type>& importer,
670 const Teuchos::RCP<const export_type>& exporter,
671 const Teuchos::RCP<Teuchos::ParameterList>& params) :
673 storageStatus_ (Details::STORAGE_1D_PACKED),
677 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
678 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
679 const char suffix[] =
680 " Please report this bug to the Tpetra developers.";
682 Teuchos::RCP<crs_graph_type> graph;
685 domainMap, rangeMap, importer,
688 catch (std::exception& e) {
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (
true, std::runtime_error,
"CrsGraph constructor "
691 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
692 "params) threw: " << e.what ());
694 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
695 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
696 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
697 "Export, params) did not produce a fill-complete graph. "
698 "Please report this bug to the Tpetra developers.");
703 staticGraph_ = graph;
706 valuesUnpacked_wdv = valuesPacked_wdv;
708 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
710 "At the end of a CrsMatrix constructor that should produce "
711 "a fillComplete matrix, isFillActive() is true." << suffix);
712 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
714 "CrsMatrix constructor that should produce a fillComplete "
715 "matrix, isFillComplete() is false." << suffix);
719 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
722 const Teuchos::DataAccess copyOrView):
724 staticGraph_ (source.getCrsGraph()),
725 storageStatus_ (source.storageStatus_)
727 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
728 "const CrsMatrix&, const Teuchos::DataAccess): ";
729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
731 "Source graph must be fillComplete().");
733 if (copyOrView == Teuchos::Copy) {
734 using values_type =
typename local_matrix_device_type::values_type;
736 using Kokkos::view_alloc;
737 using Kokkos::WithoutInitializing;
738 values_type newvals (view_alloc (
"val", WithoutInitializing),
743 valuesUnpacked_wdv = valuesPacked_wdv;
746 else if (copyOrView == Teuchos::View) {
752 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
753 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
754 "has an invalid value " << copyOrView <<
". Valid values "
755 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
756 "Teuchos::View = " << Teuchos::View <<
".");
761 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
766 std::swap(crs_matrix.
importMV_, this->importMV_);
767 std::swap(crs_matrix.
exportMV_, this->exportMV_);
768 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
769 std::swap(crs_matrix.myGraph_, this->myGraph_);
770 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
771 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
774 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
777 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
778 Teuchos::RCP<const Teuchos::Comm<int> >
781 return getCrsGraphRef ().getComm ();
784 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
788 return fillComplete_;
791 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
795 return ! fillComplete_;
798 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
802 return this->getCrsGraphRef ().isStorageOptimized ();
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
809 return getCrsGraphRef ().isLocallyIndexed ();
812 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
816 return getCrsGraphRef ().isGloballyIndexed ();
819 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
823 return getCrsGraphRef ().hasColMap ();
826 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
830 return getCrsGraphRef ().getGlobalNumEntries ();
833 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
837 return getCrsGraphRef ().getLocalNumEntries ();
840 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
844 return getCrsGraphRef ().getGlobalNumRows ();
847 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
851 return getCrsGraphRef ().getGlobalNumCols ();
854 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
858 return getCrsGraphRef ().getLocalNumRows ();
862 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
866 return getCrsGraphRef ().getLocalNumCols ();
870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
874 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
881 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
891 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
895 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
898 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
902 return getRowMap ()->getIndexBase ();
905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
906 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
909 return getCrsGraphRef ().getRowMap ();
912 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
913 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
916 return getCrsGraphRef ().getColMap ();
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
920 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
923 return getCrsGraphRef ().getDomainMap ();
926 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
927 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
930 return getCrsGraphRef ().getRangeMap ();
933 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
934 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
937 if (staticGraph_ != Teuchos::null) {
943 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
944 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
947 if (staticGraph_ != Teuchos::null) {
953 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
958 #ifdef HAVE_TPETRA_DEBUG
959 constexpr
bool debug =
true;
961 constexpr
bool debug =
false;
962 #endif // HAVE_TPETRA_DEBUG
964 if (! this->staticGraph_.is_null ()) {
965 return * (this->staticGraph_);
969 const char tfecfFuncName[] =
"getCrsGraphRef: ";
970 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
971 (this->myGraph_.is_null (), std::logic_error,
972 "Both staticGraph_ and myGraph_ are null. "
973 "Please report this bug to the Tpetra developers.");
975 return * (this->myGraph_);
979 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
980 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
984 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
987 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
988 staticGraph_->getLocalGraphDevice());
991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
992 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
996 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
997 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
998 valuesPacked_wdv.getHostView(Access::ReadWrite),
999 staticGraph_->getLocalGraphHost());
1002 #if KOKKOSKERNELS_VERSION < 40299
1004 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1005 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1009 auto localMatrix = getLocalMatrixDevice();
1010 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
1011 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()))
1013 if(this->ordinalRowptrs.data() ==
nullptr)
1015 auto originalRowptrs = localMatrix.graph.row_map;
1018 this->ordinalRowptrs = ordinal_rowptrs_type(
1019 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1020 auto ordinalRowptrs_ = this->ordinalRowptrs;
1021 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1022 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1023 KOKKOS_LAMBDA(LocalOrdinal i)
1025 ordinalRowptrs_(i) = originalRowptrs(i);
1029 return std::make_shared<local_multiply_op_type>(
1030 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1034 return std::make_shared<local_multiply_op_type>(
1035 std::make_shared<local_matrix_device_type>(localMatrix));
1039 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 return myGraph_.is_null ();
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const char tfecfFuncName[] =
"allocateValues: ";
1070 const char suffix[] =
1071 " Please report this bug to the Tpetra developers.";
1072 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1074 std::unique_ptr<std::string> prefix;
1076 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1077 std::ostringstream os;
1078 os << *prefix <<
"lg: "
1079 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1081 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1082 <<
"Allocated" << endl;
1083 std::cerr << os.str();
1086 const bool debug = Behavior::debug(
"CrsMatrix");
1088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1089 (this->staticGraph_.is_null (), std::logic_error,
1090 "staticGraph_ is null." << suffix);
1095 if ((gas == GraphAlreadyAllocated) !=
1096 staticGraph_->indicesAreAllocated ()) {
1097 const char err1[] =
"The caller has asserted that the graph "
1099 const char err2[] =
"already allocated, but the static graph "
1100 "says that its indices are ";
1101 const char err3[] =
"already allocated. ";
1102 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1103 (gas == GraphAlreadyAllocated &&
1104 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1105 err1 << err2 <<
"not " << err3 << suffix);
1106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1107 (gas != GraphAlreadyAllocated &&
1108 staticGraph_->indicesAreAllocated (), std::logic_error,
1109 err1 <<
"not " << err2 << err3 << suffix);
1117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1118 (! this->staticGraph_->indicesAreAllocated () &&
1119 this->myGraph_.is_null (), std::logic_error,
1120 "The static graph says that its indices are not allocated, "
1121 "but the graph is not owned by the matrix." << suffix);
1124 if (gas == GraphNotYetAllocated) {
1126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1127 (this->myGraph_.is_null (), std::logic_error,
1128 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1131 this->myGraph_->allocateIndices (lg, verbose);
1133 catch (std::exception& e) {
1134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1135 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1136 "threw an exception: " << e.what ());
1139 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1140 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1141 "threw an exception not a subclass of std::exception.");
1146 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1148 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1149 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1150 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1151 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1155 using values_type =
typename local_matrix_device_type::values_type;
1157 std::ostringstream os;
1158 os << *prefix <<
"Allocate values_wdv: Pre "
1159 << valuesUnpacked_wdv.extent(0) <<
", post "
1160 << lclTotalNumEntries << endl;
1161 std::cerr << os.str();
1165 values_type(
"Tpetra::CrsMatrix::values",
1166 lclTotalNumEntries));
1170 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1176 using ::Tpetra::Details::getEntryOnHost;
1177 using Teuchos::arcp_const_cast;
1178 using Teuchos::Array;
1179 using Teuchos::ArrayRCP;
1180 using Teuchos::null;
1184 using row_map_type =
typename local_graph_device_type::row_map_type;
1185 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1186 using values_type =
typename local_matrix_device_type::values_type;
1188 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1190 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1191 "fillComplete or expertStaticFillComplete): ";
1192 const char suffix[] =
1193 " Please report this bug to the Tpetra developers.";
1197 std::unique_ptr<std::string> prefix;
1199 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1200 std::ostringstream os;
1201 os << *prefix << endl;
1202 std::cerr << os.str ();
1208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1209 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1210 "(myGraph_) is null. This means that the matrix has a "
1211 "const (a.k.a. \"static\") graph. fillComplete or "
1212 "expertStaticFillComplete should never call "
1213 "fillLocalGraphAndMatrix in that case." << suffix);
1216 const size_t lclNumRows = this->getLocalNumRows ();
1231 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1232 myGraph_->rowPtrsUnpacked_dev_;
1235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1236 (curRowOffsets.extent (0) == 0, std::logic_error,
1237 "curRowOffsets.extent(0) == 0.");
1238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1239 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1240 "curRowOffsets.extent(0) = "
1241 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1242 << (lclNumRows + 1) <<
".");
1243 const size_t numOffsets = curRowOffsets.extent (0);
1244 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1247 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1248 std::logic_error,
"numOffsets = " <<
1249 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1250 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1251 << numOffsets <<
") = " << valToCheck <<
".");
1254 if (myGraph_->getLocalNumEntries() !=
1255 myGraph_->getLocalAllocationSize()) {
1259 typename row_map_type::non_const_type k_ptrs;
1260 row_map_type k_ptrs_const;
1261 lclinds_1d_type k_inds;
1265 std::ostringstream os;
1266 const auto numEnt = myGraph_->getLocalNumEntries();
1267 const auto allocSize = myGraph_->getLocalAllocationSize();
1268 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1269 <<
", allocSize=" << allocSize << endl;
1270 std::cerr << os.str ();
1278 if (debug && curRowOffsets.extent (0) != 0) {
1279 const size_t numOffsets =
1280 static_cast<size_t> (curRowOffsets.extent (0));
1281 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1283 (static_cast<size_t> (valToCheck) !=
1284 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1285 std::logic_error,
"(unpacked branch) Before "
1286 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1287 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1288 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1289 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1290 (static_cast<size_t> (valToCheck) !=
1291 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1292 std::logic_error,
"(unpacked branch) Before "
1293 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1294 <<
") = " << valToCheck
1295 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1296 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1304 size_t lclTotalNumEntries = 0;
1310 std::ostringstream os;
1311 os << *prefix <<
"Allocate packed row offsets: "
1312 << (lclNumRows+1) << endl;
1313 std::cerr << os.str ();
1315 typename row_map_type::non_const_type
1316 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1317 typename row_entries_type::const_type numRowEnt_h =
1318 myGraph_->k_numRowEntries_;
1321 lclTotalNumEntries =
1325 k_ptrs = packedRowOffsets;
1326 k_ptrs_const = k_ptrs;
1330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1331 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1333 "(unpacked branch) After packing k_ptrs, "
1334 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1335 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1336 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1337 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1338 (valToCheck != lclTotalNumEntries, std::logic_error,
1339 "(unpacked branch) After filling k_ptrs, "
1340 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1341 <<
" != total number of entries on the calling process = "
1342 << lclTotalNumEntries <<
".");
1347 std::ostringstream os;
1348 os << *prefix <<
"Allocate packed local column indices: "
1349 << lclTotalNumEntries << endl;
1350 std::cerr << os.str ();
1352 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1354 std::ostringstream os;
1355 os << *prefix <<
"Allocate packed values: "
1356 << lclTotalNumEntries << endl;
1357 std::cerr << os.str ();
1359 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1371 using inds_packer_type = pack_functor<
1372 typename Graph::local_graph_device_type::entries_type::non_const_type,
1373 typename Graph::local_inds_dualv_type::t_dev::const_type,
1374 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1375 typename Graph::local_graph_device_type::row_map_type>;
1376 inds_packer_type indsPacker (
1378 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1379 k_ptrs, curRowOffsets);
1381 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1382 Kokkos::parallel_for
1383 (
"Tpetra::CrsMatrix pack column indices",
1384 range_type (0, lclNumRows), indsPacker);
1388 using vals_packer_type = pack_functor<
1389 typename values_type::non_const_type,
1390 typename values_type::const_type,
1391 typename row_map_type::non_const_type,
1392 typename row_map_type::const_type>;
1393 vals_packer_type valsPacker (
1395 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1396 k_ptrs, curRowOffsets);
1397 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1398 range_type (0, lclNumRows), valsPacker);
1401 const char myPrefix[] =
"(\"Optimize Storage\""
1402 "=true branch) After packing, ";
1403 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1404 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1405 <<
"k_ptrs.extent(0) = 0. This probably means that "
1406 "rowPtrsUnpacked_ was never allocated.");
1407 if (k_ptrs.extent (0) != 0) {
1408 const size_t numOffsets (k_ptrs.extent (0));
1409 const auto valToCheck =
1410 getEntryOnHost (k_ptrs, numOffsets - 1);
1411 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1412 (
size_t (valToCheck) != k_vals.extent (0),
1413 std::logic_error, myPrefix <<
1414 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1415 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1417 (
size_t (valToCheck) != k_inds.extent (0),
1418 std::logic_error, myPrefix <<
1419 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1420 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1424 myGraph_->setRowPtrsPacked(k_ptrs_const);
1425 myGraph_->lclIndsPacked_wdv =
1432 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1433 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1434 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1435 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1436 valuesPacked_wdv = valuesUnpacked_wdv;
1439 std::ostringstream os;
1440 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1441 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1442 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1443 << valuesUnpacked_wdv.extent(0) << endl;
1444 std::cerr << os.str();
1448 const char myPrefix[] =
1449 "(\"Optimize Storage\"=false branch) ";
1450 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1452 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1453 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1454 "that rowPtrsUnpacked_ was never allocated.");
1455 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1456 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1457 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1459 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1460 std::logic_error, myPrefix <<
1461 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1462 <<
" != valuesPacked_wdv.extent(0) = "
1463 << valuesPacked_wdv.extent (0) <<
".");
1464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1465 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1466 std::logic_error, myPrefix <<
1467 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1468 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1469 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1475 const char myPrefix[] =
"After packing, ";
1476 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1478 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1479 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1480 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1481 (lclNumRows+1) <<
".");
1482 if (rowPtrsPackedHost.extent (0) != 0) {
1483 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1484 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1486 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1487 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1488 (numOffsets-1) <<
") = " << valToCheck
1489 <<
" != valuesPacked_wdv.extent(0) = "
1490 << valuesPacked_wdv.extent (0) <<
".");
1491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1492 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1493 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1494 (numOffsets-1) <<
") = " << valToCheck
1495 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1496 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1504 const bool defaultOptStorage =
1505 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1506 const bool requestOptimizedStorage =
1507 (! params.is_null () &&
1508 params->get (
"Optimize Storage", defaultOptStorage)) ||
1509 (params.is_null () && defaultOptStorage);
1514 if (requestOptimizedStorage) {
1519 std::ostringstream os;
1520 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1521 << myGraph_->k_numRowEntries_.extent(0) << endl;
1522 std::cerr << os.str();
1525 myGraph_->k_numRowEntries_ = row_entries_type ();
1530 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1531 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1532 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1533 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1534 valuesUnpacked_wdv = valuesPacked_wdv;
1536 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1537 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1541 std::ostringstream os;
1542 os << *prefix <<
"User requested NOT to optimize storage"
1544 std::cerr << os.str();
1549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1554 using ::Tpetra::Details::ProfilingRegion;
1555 using Teuchos::ArrayRCP;
1556 using Teuchos::Array;
1557 using Teuchos::null;
1561 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1562 using non_const_row_map_type =
typename row_map_type::non_const_type;
1563 using values_type =
typename local_matrix_device_type::values_type;
1564 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1565 const size_t lclNumRows = getLocalNumRows();
1568 std::unique_ptr<std::string> prefix;
1570 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1571 std::ostringstream os;
1572 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1573 std::cerr << os.str ();
1585 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1586 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1587 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1589 row_map_type k_ptrs;
1595 bool requestOptimizedStorage =
true;
1596 const bool default_OptimizeStorage =
1597 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1598 if (! params.is_null() &&
1599 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1600 requestOptimizedStorage =
false;
1607 if (! staticGraph_->isStorageOptimized () &&
1608 requestOptimizedStorage) {
1610 (
true, std::runtime_error,
"You requested optimized storage "
1611 "by setting the \"Optimize Storage\" flag to \"true\" in "
1612 "the ParameterList, or by virtue of default behavior. "
1613 "However, the associated CrsGraph was filled separately and "
1614 "requested not to optimize storage. Therefore, the "
1615 "CrsMatrix cannot optimize storage.");
1616 requestOptimizedStorage =
false;
1641 if (nodeNumEntries != nodeNumAllocated) {
1643 std::ostringstream os;
1644 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1645 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1647 std::cerr << os.str();
1652 std::ostringstream os;
1653 os << *prefix <<
"Allocate packed row offsets: "
1654 << (lclNumRows+1) << endl;
1655 std::cerr << os.str();
1657 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1662 size_t lclTotalNumEntries = 0;
1665 typename row_entries_type::const_type numRowEnt_h =
1666 staticGraph_->k_numRowEntries_;
1668 lclTotalNumEntries =
1675 std::ostringstream os;
1676 os << *prefix <<
"Allocate packed values: "
1677 << lclTotalNumEntries << endl;
1678 std::cerr << os.str ();
1680 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1684 typename values_type::non_const_type,
1685 typename values_type::const_type,
1686 typename row_map_type::non_const_type,
1687 typename row_map_type::const_type> valsPacker
1688 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1689 tmpk_ptrs, k_rowPtrs);
1692 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1693 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1694 range_type (0, lclNumRows), valsPacker);
1698 valuesPacked_wdv = valuesUnpacked_wdv;
1700 std::ostringstream os;
1701 os << *prefix <<
"Storage already packed: "
1702 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1703 std::cerr << os.str();
1708 if (requestOptimizedStorage) {
1711 valuesUnpacked_wdv = valuesPacked_wdv;
1713 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1717 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1722 const typename crs_graph_type::SLocalGlobalViews& newInds,
1723 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1724 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1725 const ELocalGlobal lg,
1726 const ELocalGlobal I)
1728 const size_t oldNumEnt = rowInfo.numEntries;
1729 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1735 if (numInserted > 0) {
1736 const size_t startOffset = oldNumEnt;
1737 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1738 numInserted *
sizeof (impl_scalar_type));
1742 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1746 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1747 const Teuchos::ArrayView<const Scalar>& values,
1751 const char tfecfFuncName[] =
"insertLocalValues: ";
1753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1754 (! this->isFillActive (), std::runtime_error,
1755 "Fill is not active. After calling fillComplete, you must call "
1756 "resumeFill before you may insert entries into the matrix again.");
1757 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1758 (this->isStaticGraph (), std::runtime_error,
1759 "Cannot insert indices with static graph; use replaceLocalValues() "
1763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1764 (graph.
colMap_.is_null (), std::runtime_error,
1765 "Cannot insert local indices without a column map.");
1766 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1768 std::runtime_error,
"Graph indices are global; use "
1769 "insertGlobalValues().");
1770 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1771 (values.size () != indices.size (), std::runtime_error,
1772 "values.size() = " << values.size ()
1773 <<
" != indices.size() = " << indices.size () <<
".");
1774 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1775 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1776 "Local row index " << lclRow <<
" does not belong to this process.");
1778 if (! graph.indicesAreAllocated ()) {
1782 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1785 #ifdef HAVE_TPETRA_DEBUG
1786 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1791 using Teuchos::toString;
1794 Teuchos::Array<LocalOrdinal> badColInds;
1795 bool allInColMap =
true;
1796 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1798 allInColMap =
false;
1799 badColInds.push_back (indices[k]);
1802 if (! allInColMap) {
1803 std::ostringstream os;
1804 os <<
"You attempted to insert entries in owned row " << lclRow
1805 <<
", at the following column indices: " << toString (indices)
1807 os <<
"Of those, the following indices are not in the column Map on "
1808 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1809 "the matrix has a column Map already, it is invalid to insert "
1810 "entries at those locations.";
1811 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1812 (
true, std::invalid_argument, os.str ());
1815 #endif // HAVE_TPETRA_DEBUG
1819 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1821 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1822 valsView[offset] += values[k]; };
1823 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1824 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1825 }
else if (CM ==
INSERT) {
1826 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1827 valsView[offset] = values[k]; };
1828 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1829 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1831 std::ostringstream os;
1832 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1833 <<
"but this has not been implemented." << endl;
1834 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1835 (
true, std::invalid_argument, os.str ());
1839 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1843 const LocalOrdinal numEnt,
1844 const Scalar vals[],
1845 const LocalOrdinal cols[],
1848 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1849 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1850 this->insertLocalValues (localRow, colsT, valsT, CM);
1853 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1858 const GlobalOrdinal gblColInds[],
1860 const size_t numInputEnt)
1862 #ifdef HAVE_TPETRA_DEBUG
1863 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1865 const size_t curNumEnt = rowInfo.numEntries;
1866 #endif // HAVE_TPETRA_DEBUG
1868 if (! graph.indicesAreAllocated ()) {
1871 using ::Tpetra::Details::Behavior;
1872 const bool verbose = Behavior::verbose(
"CrsMatrix");
1873 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1878 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1881 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1882 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1883 valsView[offset] += vals[k];
1885 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1886 #ifdef HAVE_TPETRA_DEBUG
1892 #ifdef HAVE_TPETRA_DEBUG
1893 size_t newNumEnt = curNumEnt + numInserted;
1894 const size_t chkNewNumEnt =
1896 if (chkNewNumEnt != newNumEnt) {
1897 std::ostringstream os;
1898 os << std::endl <<
"newNumEnt = " << newNumEnt
1899 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1900 <<
") = " << chkNewNumEnt <<
"." << std::endl
1901 <<
"\torigNumEnt: " << origNumEnt << std::endl
1902 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1903 <<
"\tgblColInds: [";
1904 for (
size_t k = 0; k < numInputEnt; ++k) {
1905 os << gblColInds[k];
1906 if (k +
size_t (1) < numInputEnt) {
1910 os <<
"]" << std::endl
1912 for (
size_t k = 0; k < numInputEnt; ++k) {
1914 if (k +
size_t (1) < numInputEnt) {
1918 os <<
"]" << std::endl;
1920 if (this->supportsRowViews ()) {
1921 values_host_view_type vals2;
1922 if (this->isGloballyIndexed ()) {
1923 global_inds_host_view_type gblColInds2;
1924 const GlobalOrdinal gblRow =
1925 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1927 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1928 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1932 bool getViewThrew =
false;
1934 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1936 catch (std::exception& e) {
1937 getViewThrew =
true;
1938 os <<
"getGlobalRowView threw exception:" << std::endl
1939 << e.what () << std::endl;
1941 if (! getViewThrew) {
1942 os <<
"\tNew global column indices: ";
1943 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1944 os << gblColInds2[jjj] <<
" ";
1946 os <<
"\tNew values: ";
1947 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1948 os << vals2[jjj] <<
" ";
1953 else if (this->isLocallyIndexed ()) {
1954 local_inds_host_view_type lclColInds2;
1955 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1956 os <<
"\tNew local column indices: ";
1957 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1958 os << lclColInds2[jjj] <<
" ";
1960 os <<
"\tNew values: ";
1961 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1962 os << vals2[jjj] <<
" ";
1967 os <<
"Please report this bug to the Tpetra developers.";
1968 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969 (
true, std::logic_error, os.str ());
1971 #endif // HAVE_TPETRA_DEBUG
1974 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1978 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1979 const Teuchos::ArrayView<const Scalar>& values)
1981 using Teuchos::toString;
1984 typedef LocalOrdinal LO;
1985 typedef GlobalOrdinal GO;
1986 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1987 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1988 const char tfecfFuncName[] =
"insertGlobalValues: ";
1990 #ifdef HAVE_TPETRA_DEBUG
1991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992 (values.size () != indices.size (), std::runtime_error,
1993 "values.size() = " << values.size () <<
" != indices.size() = "
1994 << indices.size () <<
".");
1995 #endif // HAVE_TPETRA_DEBUG
1999 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2002 if (lclRow == OTLO::invalid ()) {
2009 this->insertNonownedGlobalValues (gblRow, indices, values);
2012 if (this->isStaticGraph ()) {
2014 const int myRank = rowMap.getComm ()->getRank ();
2015 const int numProcs = rowMap.getComm ()->getSize ();
2016 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2017 (
true, std::runtime_error,
2018 "The matrix was constructed with a constant (\"static\") graph, "
2019 "yet the given global row index " << gblRow <<
" is in the row "
2020 "Map on the calling process (with rank " << myRank <<
", of " <<
2021 numProcs <<
" process(es)). In this case, you may not insert "
2022 "new entries into rows owned by the calling process.");
2026 const IST*
const inputVals =
2027 reinterpret_cast<const IST*
> (values.getRawPtr ());
2028 const GO*
const inputGblColInds = indices.getRawPtr ();
2029 const size_t numInputEnt = indices.size ();
2038 if (! graph.
colMap_.is_null ()) {
2044 #ifdef HAVE_TPETRA_DEBUG
2045 Teuchos::Array<GO> badColInds;
2046 #endif // HAVE_TPETRA_DEBUG
2047 const size_type numEntriesToInsert = indices.size ();
2048 bool allInColMap =
true;
2049 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2051 allInColMap =
false;
2052 #ifdef HAVE_TPETRA_DEBUG
2053 badColInds.push_back (indices[k]);
2056 #endif // HAVE_TPETRA_DEBUG
2059 if (! allInColMap) {
2060 std::ostringstream os;
2061 os <<
"You attempted to insert entries in owned row " << gblRow
2062 <<
", at the following column indices: " << toString (indices)
2064 #ifdef HAVE_TPETRA_DEBUG
2065 os <<
"Of those, the following indices are not in the column Map "
2066 "on this process: " << toString (badColInds) <<
"." << endl
2067 <<
"Since the matrix has a column Map already, it is invalid "
2068 "to insert entries at those locations.";
2070 os <<
"At least one of those indices is not in the column Map "
2071 "on this process." << endl <<
"It is invalid to insert into "
2072 "columns not in the column Map on the process that owns the "
2074 #endif // HAVE_TPETRA_DEBUG
2075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076 (
true, std::invalid_argument, os.str ());
2080 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2081 inputVals, numInputEnt);
2086 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2090 const LocalOrdinal numEnt,
2091 const Scalar vals[],
2092 const GlobalOrdinal inds[])
2094 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2095 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2096 this->insertGlobalValues (globalRow, indsT, valsT);
2100 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2104 const GlobalOrdinal gblRow,
2105 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2106 const Teuchos::ArrayView<const Scalar>& values,
2109 typedef impl_scalar_type IST;
2110 typedef LocalOrdinal LO;
2111 typedef GlobalOrdinal GO;
2112 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2113 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 (values.size () != indices.size (), std::runtime_error,
2118 "values.size() = " << values.size () <<
" != indices.size() = "
2119 << indices.size () <<
".");
2124 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2125 const LO lclRow = rowMap.getLocalElement (gblRow);
2126 if (lclRow == OTLO::invalid ()) {
2133 this->insertNonownedGlobalValues (gblRow, indices, values);
2136 if (this->isStaticGraph ()) {
2138 const int myRank = rowMap.getComm ()->getRank ();
2139 const int numProcs = rowMap.getComm ()->getSize ();
2140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2141 (
true, std::runtime_error,
2142 "The matrix was constructed with a constant (\"static\") graph, "
2143 "yet the given global row index " << gblRow <<
" is in the row "
2144 "Map on the calling process (with rank " << myRank <<
", of " <<
2145 numProcs <<
" process(es)). In this case, you may not insert "
2146 "new entries into rows owned by the calling process.");
2149 crs_graph_type& graph = * (this->myGraph_);
2150 const IST*
const inputVals =
2151 reinterpret_cast<const IST*
> (values.getRawPtr ());
2152 const GO*
const inputGblColInds = indices.getRawPtr ();
2153 const size_t numInputEnt = indices.size ();
2154 RowInfo rowInfo = graph.getRowInfo (lclRow);
2156 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2163 const map_type& colMap = * (graph.colMap_);
2164 size_t curOffset = 0;
2165 while (curOffset < numInputEnt) {
2169 Teuchos::Array<LO> lclIndices;
2170 size_t endOffset = curOffset;
2171 for ( ; endOffset < numInputEnt; ++endOffset) {
2172 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2173 if (lclIndex != OTLO::invalid())
2174 lclIndices.push_back(lclIndex);
2181 const LO numIndInSeq = (endOffset - curOffset);
2182 if (numIndInSeq != 0) {
2183 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2189 const bool invariant = endOffset == numInputEnt ||
2190 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2192 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2194 curOffset = endOffset + 1;
2197 else if (! graph.colMap_.is_null ()) {
2198 const map_type& colMap = * (graph.colMap_);
2199 size_t curOffset = 0;
2200 while (curOffset < numInputEnt) {
2204 size_t endOffset = curOffset;
2205 for ( ; endOffset < numInputEnt &&
2206 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2212 const LO numIndInSeq = (endOffset - curOffset);
2213 if (numIndInSeq != 0) {
2214 rowInfo = graph.getRowInfo(lclRow);
2215 this->insertGlobalValuesImpl (graph, rowInfo,
2216 inputGblColInds + curOffset,
2217 inputVals + curOffset,
2224 const bool invariant = endOffset == numInputEnt ||
2225 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2227 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2229 curOffset = endOffset + 1;
2233 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2234 inputVals, numInputEnt);
2239 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2241 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2242 insertGlobalValuesFilteredChecked(
2243 const GlobalOrdinal gblRow,
2244 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2245 const Teuchos::ArrayView<const Scalar>& values,
2246 const char*
const prefix,
2254 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2256 catch(std::exception& e) {
2257 std::ostringstream os;
2259 const size_t maxNumToPrint =
2261 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2262 "exception: " << e.what() << endl
2263 <<
"Global row index: " << gblRow << endl;
2271 os <<
": insertGlobalValuesFiltered threw an exception: "
2274 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2278 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2284 const LocalOrdinal inds[],
2286 const LocalOrdinal numElts)
2288 typedef LocalOrdinal LO;
2289 typedef GlobalOrdinal GO;
2290 const bool sorted = graph.
isSorted ();
2300 for (LO j = 0; j < numElts; ++j) {
2301 const LO lclColInd = inds[j];
2302 const size_t offset =
2303 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2304 lclColInd, hint, sorted);
2305 if (offset != rowInfo.numEntries) {
2306 rowVals[offset] = newVals[j];
2313 if (graph.
colMap_.is_null ()) {
2314 return Teuchos::OrdinalTraits<LO>::invalid ();
2322 for (LO j = 0; j < numElts; ++j) {
2324 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2325 const size_t offset =
2326 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2327 gblColInd, hint, sorted);
2328 if (offset != rowInfo.numEntries) {
2329 rowVals[offset] = newVals[j];
2348 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2352 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2353 const Teuchos::ArrayView<const Scalar>& vals)
2355 typedef LocalOrdinal LO;
2357 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2358 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2359 return Teuchos::OrdinalTraits<LO>::invalid ();
2361 const LO*
const inputInds = lclCols.getRawPtr ();
2362 const Scalar*
const inputVals = vals.getRawPtr ();
2363 return this->replaceLocalValues (localRow, numInputEnt,
2364 inputVals, inputInds);
2367 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2373 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2374 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2377 const LO numInputEnt = inputInds.extent(0);
2378 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2379 return Teuchos::OrdinalTraits<LO>::invalid();
2381 const Scalar*
const inVals =
2382 reinterpret_cast<const Scalar*
>(inputVals.data());
2383 return this->replaceLocalValues(localRow, numInputEnt,
2384 inVals, inputInds.data());
2387 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2391 const LocalOrdinal numEnt,
2392 const Scalar inputVals[],
2393 const LocalOrdinal inputCols[])
2396 typedef LocalOrdinal LO;
2398 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2400 return Teuchos::OrdinalTraits<LO>::invalid ();
2405 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2408 return static_cast<LO
> (0);
2410 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2411 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2412 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2413 inputCols, inVals, numEnt);
2416 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2422 const GlobalOrdinal inds[],
2424 const LocalOrdinal numElts)
2426 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2428 [&](
size_t const k,
size_t const ,
size_t const offset) {
2429 rowVals[offset] = newVals[k];
2431 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2435 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2439 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2440 const Teuchos::ArrayView<const Scalar>& inputVals)
2442 typedef LocalOrdinal LO;
2444 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2445 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2446 return Teuchos::OrdinalTraits<LO>::invalid ();
2448 return this->replaceGlobalValues (globalRow, numInputEnt,
2449 inputVals.getRawPtr (),
2450 inputGblColInds.getRawPtr ());
2453 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2457 const LocalOrdinal numEnt,
2458 const Scalar inputVals[],
2459 const GlobalOrdinal inputGblColInds[])
2462 typedef LocalOrdinal LO;
2464 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2466 return Teuchos::OrdinalTraits<LO>::invalid ();
2471 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2474 return static_cast<LO
> (0);
2477 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2478 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2479 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2480 inputGblColInds, inVals, numEnt);
2483 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2489 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2490 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2499 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2500 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2501 return Teuchos::OrdinalTraits<LO>::invalid();
2503 const Scalar*
const inVals =
2504 reinterpret_cast<const Scalar*
>(inputVals.data());
2505 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2509 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2515 const GlobalOrdinal inds[],
2517 const LocalOrdinal numElts,
2520 typedef LocalOrdinal LO;
2521 typedef GlobalOrdinal GO;
2523 const bool sorted = graph.
isSorted ();
2532 if (graph.
colMap_.is_null ()) {
2543 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2545 for (LO j = 0; j < numElts; ++j) {
2547 if (lclColInd != LINV) {
2548 const size_t offset =
2549 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2550 lclColInd, hint, sorted);
2551 if (offset != rowInfo.numEntries) {
2553 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2556 rowVals[offset] += newVals[j];
2569 for (LO j = 0; j < numElts; ++j) {
2570 const GO gblColInd = inds[j];
2571 const size_t offset =
2572 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2573 gblColInd, hint, sorted);
2574 if (offset != rowInfo.numEntries) {
2576 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2579 rowVals[offset] += newVals[j];
2593 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2597 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2598 const Teuchos::ArrayView<const Scalar>& inputVals,
2601 typedef LocalOrdinal LO;
2603 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2604 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2605 return Teuchos::OrdinalTraits<LO>::invalid ();
2607 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2608 inputVals.getRawPtr (),
2609 inputGblColInds.getRawPtr (),
2613 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2617 const LocalOrdinal numInputEnt,
2618 const Scalar inputVals[],
2619 const GlobalOrdinal inputGblColInds[],
2623 typedef LocalOrdinal LO;
2624 typedef GlobalOrdinal GO;
2626 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2628 return Teuchos::OrdinalTraits<LO>::invalid ();
2633 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2638 using Teuchos::ArrayView;
2639 ArrayView<const GO> inputGblColInds_av(
2640 numInputEnt == 0 ?
nullptr : inputGblColInds,
2642 ArrayView<const Scalar> inputVals_av(
2643 numInputEnt == 0 ?
nullptr :
2644 inputVals, numInputEnt);
2649 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2660 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2661 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2662 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2663 inputGblColInds, inVals,
2664 numInputEnt, atomic);
2668 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2672 const LocalOrdinal numInputEnt,
2673 const impl_scalar_type inputVals[],
2674 const LocalOrdinal inputCols[],
2675 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2678 using Tpetra::Details::OrdinalTraits;
2679 typedef LocalOrdinal LO;
2681 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2683 return Teuchos::OrdinalTraits<LO>::invalid ();
2685 const crs_graph_type& graph = * (this->staticGraph_);
2686 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2688 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2691 return static_cast<LO
> (0);
2693 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2694 return this->transformLocalValues (curRowVals.data (), graph,
2695 rowInfo, inputCols, inputVals,
2696 numInputEnt, f, atomic);
2699 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2701 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2702 transformGlobalValues (
const GlobalOrdinal gblRow,
2703 const LocalOrdinal numInputEnt,
2704 const impl_scalar_type inputVals[],
2705 const GlobalOrdinal inputCols[],
2706 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2709 using Tpetra::Details::OrdinalTraits;
2710 typedef LocalOrdinal LO;
2712 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2714 return OrdinalTraits<LO>::invalid ();
2716 const crs_graph_type& graph = * (this->staticGraph_);
2717 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2719 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2722 return static_cast<LO
> (0);
2724 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2725 return this->transformGlobalValues (curRowVals.data (), graph,
2726 rowInfo, inputCols, inputVals,
2727 numInputEnt, f, atomic);
2730 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2732 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2733 transformLocalValues (impl_scalar_type rowVals[],
2734 const crs_graph_type& graph,
2735 const RowInfo& rowInfo,
2736 const LocalOrdinal inds[],
2737 const impl_scalar_type newVals[],
2738 const LocalOrdinal numElts,
2739 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2742 typedef impl_scalar_type ST;
2743 typedef LocalOrdinal LO;
2744 typedef GlobalOrdinal GO;
2751 const bool sorted = graph.isSorted ();
2756 if (graph.isLocallyIndexed ()) {
2759 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2761 for (LO j = 0; j < numElts; ++j) {
2762 const LO lclColInd = inds[j];
2763 const size_t offset =
2764 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2765 lclColInd, hint, sorted);
2766 if (offset != rowInfo.numEntries) {
2775 volatile ST*
const dest = &rowVals[offset];
2776 (void) atomic_binary_function_update (dest, newVals[j], f);
2780 rowVals[offset] = f (rowVals[offset], newVals[j]);
2787 else if (graph.isGloballyIndexed ()) {
2791 if (graph.colMap_.is_null ()) {
2798 const map_type& colMap = * (graph.colMap_);
2801 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2803 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2804 for (LO j = 0; j < numElts; ++j) {
2805 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2806 if (gblColInd != GINV) {
2807 const size_t offset =
2808 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2809 gblColInd, hint, sorted);
2810 if (offset != rowInfo.numEntries) {
2819 volatile ST*
const dest = &rowVals[offset];
2820 (void) atomic_binary_function_update (dest, newVals[j], f);
2824 rowVals[offset] = f (rowVals[offset], newVals[j]);
2839 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2841 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2842 transformGlobalValues (impl_scalar_type rowVals[],
2843 const crs_graph_type& graph,
2844 const RowInfo& rowInfo,
2845 const GlobalOrdinal inds[],
2846 const impl_scalar_type newVals[],
2847 const LocalOrdinal numElts,
2848 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2851 typedef impl_scalar_type ST;
2852 typedef LocalOrdinal LO;
2853 typedef GlobalOrdinal GO;
2860 const bool sorted = graph.isSorted ();
2865 if (graph.isGloballyIndexed ()) {
2868 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2870 for (LO j = 0; j < numElts; ++j) {
2871 const GO gblColInd = inds[j];
2872 const size_t offset =
2873 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2874 gblColInd, hint, sorted);
2875 if (offset != rowInfo.numEntries) {
2884 volatile ST*
const dest = &rowVals[offset];
2885 (void) atomic_binary_function_update (dest, newVals[j], f);
2889 rowVals[offset] = f (rowVals[offset], newVals[j]);
2896 else if (graph.isLocallyIndexed ()) {
2900 if (graph.colMap_.is_null ()) {
2906 const map_type& colMap = * (graph.colMap_);
2909 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2911 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2912 for (LO j = 0; j < numElts; ++j) {
2913 const LO lclColInd = colMap.getLocalElement (inds[j]);
2914 if (lclColInd != LINV) {
2915 const size_t offset =
2916 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2917 lclColInd, hint, sorted);
2918 if (offset != rowInfo.numEntries) {
2927 volatile ST*
const dest = &rowVals[offset];
2928 (void) atomic_binary_function_update (dest, newVals[j], f);
2932 rowVals[offset] = f (rowVals[offset], newVals[j]);
2947 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2953 const LocalOrdinal inds[],
2955 const LocalOrdinal numElts,
2958 typedef LocalOrdinal LO;
2959 typedef GlobalOrdinal GO;
2961 const bool sorted = graph.
isSorted ();
2971 for (LO j = 0; j < numElts; ++j) {
2972 const LO lclColInd = inds[j];
2973 const size_t offset =
2974 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2975 lclColInd, hint, sorted);
2976 if (offset != rowInfo.numEntries) {
2978 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2981 rowVals[offset] += newVals[j];
2989 if (graph.
colMap_.is_null ()) {
2990 return Teuchos::OrdinalTraits<LO>::invalid ();
2998 for (LO j = 0; j < numElts; ++j) {
3000 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3001 const size_t offset =
3002 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3003 gblColInd, hint, sorted);
3004 if (offset != rowInfo.numEntries) {
3006 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3009 rowVals[offset] += newVals[j];
3029 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3033 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3034 const Teuchos::ArrayView<const Scalar>& values,
3038 const LO numInputEnt =
static_cast<LO
>(indices.size());
3039 if (static_cast<LO>(values.size()) != numInputEnt) {
3040 return Teuchos::OrdinalTraits<LO>::invalid();
3042 const LO*
const inputInds = indices.getRawPtr();
3043 const scalar_type*
const inputVals = values.getRawPtr();
3044 return this->sumIntoLocalValues(localRow, numInputEnt,
3045 inputVals, inputInds, atomic);
3048 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3054 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3055 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3059 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3060 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3061 return Teuchos::OrdinalTraits<LO>::invalid();
3064 reinterpret_cast<const scalar_type*
>(inputVals.data());
3065 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3066 inputInds.data(), atomic);
3069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3073 const LocalOrdinal numEnt,
3074 const Scalar vals[],
3075 const LocalOrdinal cols[],
3079 typedef LocalOrdinal LO;
3081 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3083 return Teuchos::OrdinalTraits<LO>::invalid ();
3088 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3091 return static_cast<LO
> (0);
3093 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3094 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3095 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3096 cols, inputVals, numEnt, atomic);
3099 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3101 values_dualv_type::t_host::const_type
3105 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3106 return typename values_dualv_type::t_host::const_type ();
3108 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3113 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3115 values_dualv_type::t_host
3119 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3120 return typename values_dualv_type::t_host ();
3122 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3127 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3129 values_dualv_type::t_dev::const_type
3133 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3134 return typename values_dualv_type::t_dev::const_type ();
3136 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3143 values_dualv_type::t_dev
3147 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3148 return typename values_dualv_type::t_dev ();
3150 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3156 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3160 nonconst_local_inds_host_view_type &indices,
3161 nonconst_values_host_view_type &values,
3162 size_t& numEntries)
const
3164 using Teuchos::ArrayView;
3165 using Teuchos::av_reinterpret_cast;
3166 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3169 (! this->hasColMap (), std::runtime_error,
3170 "The matrix does not have a column Map yet. This means we don't have "
3171 "local indices for columns yet, so it doesn't make sense to call this "
3172 "method. If the matrix doesn't have a column Map yet, you should call "
3173 "fillComplete on it first.");
3175 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3176 const size_t theNumEntries = rowinfo.numEntries;
3177 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3178 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3179 static_cast<size_t> (values.size ()) < theNumEntries,
3180 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3181 theNumEntries <<
" entry/ies, but indices.size() = " <<
3182 indices.size () <<
" and values.size() = " << values.size () <<
".");
3183 numEntries = theNumEntries;
3185 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3186 if (staticGraph_->isLocallyIndexed ()) {
3187 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3188 auto curVals = getValuesViewHost(rowinfo);
3190 for (
size_t j = 0; j < theNumEntries; ++j) {
3191 values[j] = curVals[j];
3192 indices[j] = curLclInds(j);
3195 else if (staticGraph_->isGloballyIndexed ()) {
3197 const map_type& colMap = * (staticGraph_->colMap_);
3198 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3199 auto curVals = getValuesViewHost(rowinfo);
3201 for (
size_t j = 0; j < theNumEntries; ++j) {
3202 values[j] = curVals[j];
3210 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3214 nonconst_global_inds_host_view_type &indices,
3215 nonconst_values_host_view_type &values,
3216 size_t& numEntries)
const
3218 using Teuchos::ArrayView;
3219 using Teuchos::av_reinterpret_cast;
3220 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3223 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3224 const size_t theNumEntries = rowinfo.numEntries;
3225 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3226 static_cast<size_t> (indices.size ()) < theNumEntries ||
3227 static_cast<size_t> (values.size ()) < theNumEntries,
3228 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3229 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3230 indices.size () <<
" and values.size() = " << values.size () <<
".");
3231 numEntries = theNumEntries;
3233 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3234 if (staticGraph_->isLocallyIndexed ()) {
3235 const map_type& colMap = * (staticGraph_->colMap_);
3236 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3237 auto curVals = getValuesViewHost(rowinfo);
3239 for (
size_t j = 0; j < theNumEntries; ++j) {
3240 values[j] = curVals[j];
3244 else if (staticGraph_->isGloballyIndexed ()) {
3245 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3246 auto curVals = getValuesViewHost(rowinfo);
3248 for (
size_t j = 0; j < theNumEntries; ++j) {
3249 values[j] = curVals[j];
3250 indices[j] = curGblInds(j);
3257 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3261 local_inds_host_view_type &indices,
3262 values_host_view_type &values)
const
3264 const char tfecfFuncName[] =
"getLocalRowView: ";
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3267 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3268 "its indices as global indices, so you cannot get a view with local "
3269 "column indices. If the matrix has a column Map, you may call "
3270 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3271 "a view with global column indices by calling getGlobalRowCopy().");
3273 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3274 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3275 rowInfo.numEntries > 0) {
3276 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3280 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3287 indices = local_inds_host_view_type();
3288 values = values_host_view_type();
3291 #ifdef HAVE_TPETRA_DEBUG
3292 const char suffix[] =
". This should never happen. Please report this "
3293 "bug to the Tpetra developers.";
3294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3295 (static_cast<size_t> (indices.size ()) !=
3296 static_cast<size_t> (values.size ()), std::logic_error,
3297 "At the end of this method, for local row " << localRow <<
", "
3298 "indices.size() = " << indices.size () <<
" != values.size () = "
3299 << values.size () << suffix);
3300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3301 (static_cast<size_t> (indices.size ()) !=
3302 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3303 "At the end of this method, for local row " << localRow <<
", "
3304 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3305 << rowInfo.numEntries << suffix);
3306 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3308 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3309 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3310 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3311 expectedNumEntries << suffix);
3312 #endif // HAVE_TPETRA_DEBUG
3316 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3320 global_inds_host_view_type &indices,
3321 values_host_view_type &values)
const
3323 const char tfecfFuncName[] =
"getGlobalRowView: ";
3325 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3326 isLocallyIndexed (), std::runtime_error,
3327 "The matrix is locally indexed, so we cannot return a view of the row "
3328 "with global column indices. Use getGlobalRowCopy() instead.");
3333 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3334 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3335 rowInfo.numEntries > 0) {
3336 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3339 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3344 indices = global_inds_host_view_type();
3345 values = values_host_view_type();
3348 #ifdef HAVE_TPETRA_DEBUG
3349 const char suffix[] =
". This should never happen. Please report this "
3350 "bug to the Tpetra developers.";
3351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3352 (static_cast<size_t> (indices.size ()) !=
3353 static_cast<size_t> (values.size ()), std::logic_error,
3354 "At the end of this method, for global row " << globalRow <<
", "
3355 "indices.size() = " << indices.size () <<
" != values.size () = "
3356 << values.size () << suffix);
3357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3358 (static_cast<size_t> (indices.size ()) !=
3359 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3360 "At the end of this method, for global row " << globalRow <<
", "
3361 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3362 << rowInfo.numEntries << suffix);
3363 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3364 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3365 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3366 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3367 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3368 " " << expectedNumEntries << suffix);
3369 #endif // HAVE_TPETRA_DEBUG
3373 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3380 const size_t nlrs = staticGraph_->getLocalNumRows ();
3381 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3382 if (! staticGraph_->indicesAreAllocated () ||
3383 nlrs == 0 || numEntries == 0) {
3388 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3389 KokkosBlas::scal(vals, theAlpha, vals);
3394 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3405 const size_t numEntries = staticGraph_->getLocalNumEntries();
3406 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3414 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3418 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3421 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3422 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3423 const typename local_matrix_device_type::values_type& values)
3426 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3427 const char tfecfFuncName[] =
"setAllValues: ";
3428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3429 (columnIndices.size () != values.size (), std::invalid_argument,
3430 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3431 " = " << values.size () <<
".");
3432 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3433 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3436 myGraph_->setAllIndices (rowPointers, columnIndices);
3438 catch (std::exception &e) {
3439 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3440 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3441 "exception: " << e.what ());
3448 auto lclGraph = myGraph_->getLocalGraphDevice ();
3449 const size_t numEnt = lclGraph.entries.extent (0);
3450 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3451 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3452 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3453 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3454 "local graph. Please report this bug to the Tpetra developers.");
3457 valuesUnpacked_wdv = valuesPacked_wdv;
3461 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3463 checkInternalState ();
3466 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3472 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3474 auto graph = localDeviceMatrix.graph;
3477 auto rows = graph.row_map;
3478 auto columns = graph.entries;
3479 auto values = localDeviceMatrix.values;
3481 setAllValues(rows,columns,values);
3484 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3488 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3489 const Teuchos::ArrayRCP<Scalar>& val)
3491 using Kokkos::Compat::getKokkosViewDeepCopy;
3492 using Teuchos::ArrayRCP;
3493 using Teuchos::av_reinterpret_cast;
3496 typedef typename local_graph_device_type::row_map_type row_map_type;
3498 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3504 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3505 Kokkos::View<
const size_t*,
3506 typename row_map_type::array_layout,
3508 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3511 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3512 (ptrNative.extent (0) != ptrSizeT.extent (0),
3513 std::logic_error,
"ptrNative.extent(0) = " <<
3514 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3515 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3516 "Tpetra developers.");
3518 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3519 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3520 this->setAllValues (ptrNative, indIn, valIn);
3523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3528 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3537 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3538 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3539 offsets.resize (lclNumRows);
3545 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3550 Kokkos::MemoryUnmanaged> output_type;
3551 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3552 staticGraph_->getLocalDiagOffsets (offsetsOut);
3555 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3556 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3557 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3558 Kokkos::MemoryUnmanaged> output_type;
3559 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3565 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3570 using Teuchos::ArrayRCP;
3571 using Teuchos::ArrayView;
3572 using Teuchos::av_reinterpret_cast;
3573 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3578 staticGraph_.is_null (), std::runtime_error,
3579 "This method requires that the matrix have a graph.");
3580 auto rowMapPtr = this->getRowMap ();
3581 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3587 auto colMapPtr = this->getColMap ();
3588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3589 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3590 "This method requires that the matrix have a column Map.");
3591 const map_type& rowMap = * rowMapPtr;
3592 const map_type& colMap = * colMapPtr;
3593 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3595 #ifdef HAVE_TPETRA_DEBUG
3598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3599 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3600 "The input Vector's Map must be compatible with the CrsMatrix's row "
3601 "Map. You may check this by using Map's isCompatible method: "
3602 "diag.getMap ()->isCompatible (A.getRowMap ());");
3603 #endif // HAVE_TPETRA_DEBUG
3607 const auto D_lcl_1d =
3608 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3610 const auto lclRowMap = rowMap.getLocalMap ();
3615 getLocalMatrixDevice ());
3618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3623 Kokkos::MemoryUnmanaged>& offsets)
const
3625 typedef LocalOrdinal LO;
3627 #ifdef HAVE_TPETRA_DEBUG
3628 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3629 const map_type& rowMap = * (this->getRowMap ());
3632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3633 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3634 "The input Vector's Map must be compatible with (in the sense of Map::"
3635 "isCompatible) the CrsMatrix's row Map.");
3636 #endif // HAVE_TPETRA_DEBUG
3646 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3649 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3651 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3652 getLocalMatrixDevice ());
3655 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3659 const Teuchos::ArrayView<const size_t>& offsets)
const
3661 using LO = LocalOrdinal;
3662 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3665 #ifdef HAVE_TPETRA_DEBUG
3666 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3667 const map_type& rowMap = * (this->getRowMap ());
3670 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3671 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3672 "The input Vector's Map must be compatible with (in the sense of Map::"
3673 "isCompatible) the CrsMatrix's row Map.");
3674 #endif // HAVE_TPETRA_DEBUG
3686 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3688 using host_offsets_view_type =
3689 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3690 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3691 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3693 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3694 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3695 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3697 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3698 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3699 Kokkos::parallel_for
3700 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3701 range_type (0, myNumRows),
3702 [&, INV, h_offsets] (
const LO lclRow) {
3703 lclVecHost1d(lclRow) = STS::zero ();
3704 if (h_offsets[lclRow] != INV) {
3705 auto curRowOffset = rowPtrsPackedHost (lclRow);
3706 lclVecHost1d(lclRow) =
3707 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3714 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3719 using ::Tpetra::Details::ProfilingRegion;
3720 using Teuchos::ArrayRCP;
3721 using Teuchos::ArrayView;
3722 using Teuchos::null;
3725 using Teuchos::rcpFromRef;
3727 const char tfecfFuncName[] =
"leftScale: ";
3729 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3731 RCP<const vec_type> xp;
3732 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3735 auto exporter = this->getCrsGraphRef ().getExporter ();
3736 if (exporter.get () !=
nullptr) {
3737 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3738 tempVec->doImport (x, *exporter,
REPLACE);
3742 xp = rcpFromRef (x);
3745 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3746 xp = rcpFromRef (x);
3749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3750 (
true, std::invalid_argument,
"x's Map must be the same as "
3751 "either the row Map or the range Map of the CrsMatrix.");
3754 if (this->isFillComplete()) {
3755 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3756 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3759 x_lcl_1d,
false,
false);
3763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3764 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3769 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3774 using ::Tpetra::Details::ProfilingRegion;
3775 using Teuchos::ArrayRCP;
3776 using Teuchos::ArrayView;
3777 using Teuchos::null;
3780 using Teuchos::rcpFromRef;
3782 const char tfecfFuncName[] =
"rightScale: ";
3784 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3786 RCP<const vec_type> xp;
3787 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3790 auto importer = this->getCrsGraphRef ().getImporter ();
3791 if (importer.get () !=
nullptr) {
3792 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3793 tempVec->doImport (x, *importer,
REPLACE);
3797 xp = rcpFromRef (x);
3800 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3801 xp = rcpFromRef (x);
3803 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3804 (
true, std::runtime_error,
"x's Map must be the same as "
3805 "either the domain Map or the column Map of the CrsMatrix.");
3808 if (this->isFillComplete()) {
3809 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3810 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3813 x_lcl_1d,
false,
false);
3817 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3818 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3823 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3828 using Teuchos::ArrayView;
3829 using Teuchos::outArg;
3830 using Teuchos::REDUCE_SUM;
3831 using Teuchos::reduceAll;
3839 if (getLocalNumEntries() > 0) {
3840 if (isStorageOptimized ()) {
3843 const size_t numEntries = getLocalNumEntries ();
3844 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3845 for (
size_t k = 0; k < numEntries; ++k) {
3846 auto val = values[k];
3850 const mag_type val_abs = STS::abs (val);
3851 mySum += val_abs * val_abs;
3855 const LocalOrdinal numRows =
3856 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3857 for (LocalOrdinal r = 0; r < numRows; ++r) {
3858 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3859 const size_t numEntries = rowInfo.numEntries;
3860 auto A_r = this->getValuesViewHost(rowInfo);
3861 for (
size_t k = 0; k < numEntries; ++k) {
3863 const mag_type val_abs = STS::abs (val);
3864 mySum += val_abs * val_abs;
3870 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3871 mySum, outArg (totalSum));
3872 return STM::sqrt (totalSum);
3875 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3880 const char tfecfFuncName[] =
"replaceColMap: ";
3884 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3885 myGraph_.is_null (), std::runtime_error,
3886 "This method does not work if the matrix has a const graph. The whole "
3887 "idea of a const graph is that you are not allowed to change it, but "
3888 "this method necessarily must modify the graph, since the graph owns "
3889 "the matrix's column Map.");
3890 myGraph_->replaceColMap (newColMap);
3893 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3897 const Teuchos::RCP<const map_type>& newColMap,
3898 const Teuchos::RCP<const import_type>& newImport,
3899 const bool sortEachRow)
3901 const char tfecfFuncName[] =
"reindexColumns: ";
3902 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3903 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3904 "The input graph is null, but the matrix does not own its graph.");
3906 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3907 const bool sortGraph =
false;
3911 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3912 const LocalOrdinal lclNumRows =
3913 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3915 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3917 const RowInfo rowInfo = theGraph.getRowInfo (row);
3918 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3919 auto vals = this->getValuesViewHostNonConst (rowInfo);
3921 sort2 (lclColInds.data (),
3922 lclColInds.data () + rowInfo.numEntries,
3925 theGraph.indicesAreSorted_ =
true;
3929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3934 const char tfecfFuncName[] =
"replaceDomainMap: ";
3935 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3936 myGraph_.is_null (), std::runtime_error,
3937 "This method does not work if the matrix has a const graph. The whole "
3938 "idea of a const graph is that you are not allowed to change it, but this"
3939 " method necessarily must modify the graph, since the graph owns the "
3940 "matrix's domain Map and Import objects.");
3941 myGraph_->replaceDomainMap (newDomainMap);
3944 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3948 Teuchos::RCP<const import_type>& newImporter)
3950 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3951 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3952 myGraph_.is_null (), std::runtime_error,
3953 "This method does not work if the matrix has a const graph. The whole "
3954 "idea of a const graph is that you are not allowed to change it, but this"
3955 " method necessarily must modify the graph, since the graph owns the "
3956 "matrix's domain Map and Import objects.");
3957 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3960 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3965 const char tfecfFuncName[] =
"replaceRangeMap: ";
3966 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3967 myGraph_.is_null (), std::runtime_error,
3968 "This method does not work if the matrix has a const graph. The whole "
3969 "idea of a const graph is that you are not allowed to change it, but this"
3970 " method necessarily must modify the graph, since the graph owns the "
3971 "matrix's domain Map and Import objects.");
3972 myGraph_->replaceRangeMap (newRangeMap);
3975 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3979 Teuchos::RCP<const export_type>& newExporter)
3981 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3982 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3983 myGraph_.is_null (), std::runtime_error,
3984 "This method does not work if the matrix has a const graph. The whole "
3985 "idea of a const graph is that you are not allowed to change it, but this"
3986 " method necessarily must modify the graph, since the graph owns the "
3987 "matrix's domain Map and Import objects.");
3988 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
3991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3995 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3996 const Teuchos::ArrayView<const Scalar>& values)
3998 using Teuchos::Array;
3999 typedef GlobalOrdinal GO;
4000 typedef typename Array<GO>::size_type size_type;
4002 const size_type numToInsert = indices.size ();
4005 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4006 Array<GO>& curRowInds = curRow.first;
4007 Array<Scalar>& curRowVals = curRow.second;
4008 const size_type newCapacity = curRowInds.size () + numToInsert;
4009 curRowInds.reserve (newCapacity);
4010 curRowVals.reserve (newCapacity);
4011 for (size_type k = 0; k < numToInsert; ++k) {
4012 curRowInds.push_back (indices[k]);
4013 curRowVals.push_back (values[k]);
4017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4024 using Teuchos::Comm;
4025 using Teuchos::outArg;
4028 using Teuchos::REDUCE_MAX;
4029 using Teuchos::REDUCE_MIN;
4030 using Teuchos::reduceAll;
4034 typedef GlobalOrdinal GO;
4035 typedef typename Teuchos::Array<GO>::size_type size_type;
4036 const char tfecfFuncName[] =
"globalAssemble: ";
4037 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4039 const bool verbose = Behavior::verbose(
"CrsMatrix");
4040 std::unique_ptr<std::string> prefix;
4042 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4043 std::ostringstream os;
4044 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4046 std::cerr << os.str();
4048 RCP<const Comm<int> > comm = getComm ();
4050 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4051 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4052 "you may call this method.");
4054 const size_t myNumNonlocalRows = nonlocals_.size ();
4061 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4062 int someoneHasNonlocalRows = 0;
4063 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4064 outArg (someoneHasNonlocalRows));
4065 if (someoneHasNonlocalRows == 0) {
4079 RCP<const map_type> nonlocalRowMap;
4080 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4082 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4083 size_type curPos = 0;
4084 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4085 ++mapIter, ++curPos) {
4086 myNonlocalGblRows[curPos] = mapIter->first;
4089 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4090 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4097 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4098 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4099 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4100 merge2 (gblCols_newEnd, vals_newEnd,
4101 gblCols.begin (), gblCols.end (),
4102 vals.begin (), vals.end ());
4103 gblCols.erase (gblCols_newEnd, gblCols.end ());
4104 vals.erase (vals_newEnd, vals.end ());
4105 numEntPerNonlocalRow[curPos] = gblCols.size ();
4116 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4118 auto iter = std::min_element (myNonlocalGblRows.begin (),
4119 myNonlocalGblRows.end ());
4120 if (iter != myNonlocalGblRows.end ()) {
4121 myMinNonlocalGblRow = *iter;
4124 GO gblMinNonlocalGblRow = 0;
4125 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4126 outArg (gblMinNonlocalGblRow));
4127 const GO indexBase = gblMinNonlocalGblRow;
4128 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4129 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4138 std::ostringstream os;
4139 os << *prefix <<
"Create nonlocal matrix" << endl;
4140 std::cerr << os.str();
4142 RCP<crs_matrix_type> nonlocalMatrix =
4143 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4145 size_type curPos = 0;
4146 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4147 ++mapIter, ++curPos) {
4148 const GO gblRow = mapIter->first;
4150 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4151 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4153 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4165 auto origRowMap = this->getRowMap ();
4166 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4168 int isLocallyComplete = 1;
4170 if (origRowMapIsOneToOne) {
4172 std::ostringstream os;
4173 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4174 std::cerr << os.str();
4176 export_type exportToOrig (nonlocalRowMap, origRowMap);
4178 isLocallyComplete = 0;
4181 std::ostringstream os;
4182 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4183 std::cerr << os.str();
4185 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4190 std::ostringstream os;
4191 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4192 std::cerr << os.str();
4199 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4201 isLocallyComplete = 0;
4209 std::ostringstream os;
4210 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4212 std::cerr << os.str();
4214 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4216 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4222 std::ostringstream os;
4223 os << *prefix <<
"Free nonlocalMatrix" << endl;
4224 std::cerr << os.str();
4226 nonlocalMatrix = Teuchos::null;
4230 std::ostringstream os;
4231 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4232 std::cerr << os.str();
4234 import_type importToOrig (oneToOneRowMap, origRowMap);
4235 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4243 std::ostringstream os;
4244 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4245 std::cerr << os.str();
4247 decltype (nonlocals_) newNonlocals;
4248 std::swap (nonlocals_, newNonlocals);
4257 int isGloballyComplete = 0;
4258 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4259 outArg (isGloballyComplete));
4260 TEUCHOS_TEST_FOR_EXCEPTION
4261 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4262 "you called insertGlobalValues with a global row index which is not in "
4263 "the matrix's row Map on any process in its communicator.");
4266 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4271 if (! isStaticGraph ()) {
4272 myGraph_->resumeFill (params);
4274 #if KOKKOSKERNELS_VERSION >= 40299
4276 applyHelper.reset();
4278 fillComplete_ =
false;
4281 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4285 return getCrsGraphRef ().haveGlobalConstants ();
4288 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4293 const char tfecfFuncName[] =
"fillComplete(params): ";
4295 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4296 (this->getCrsGraph ().is_null (), std::logic_error,
4297 "getCrsGraph() returns null. This should not happen at this point. "
4298 "Please report this bug to the Tpetra developers.");
4308 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4309 Teuchos::RCP<const map_type> domainMap = rangeMap;
4310 this->fillComplete (domainMap, rangeMap, params);
4314 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4318 const Teuchos::RCP<const map_type>& rangeMap,
4319 const Teuchos::RCP<Teuchos::ParameterList>& params)
4323 using Teuchos::ArrayRCP;
4327 const char tfecfFuncName[] =
"fillComplete: ";
4328 ProfilingRegion regionFillComplete
4329 (
"Tpetra::CrsMatrix::fillComplete");
4330 const bool verbose = Behavior::verbose(
"CrsMatrix");
4331 std::unique_ptr<std::string> prefix;
4333 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4334 std::ostringstream os;
4335 os << *prefix << endl;
4336 std::cerr << os.str ();
4339 "Tpetra::CrsMatrix::fillCompete",
4342 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4343 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4344 "Matrix fill state must be active (isFillActive() "
4345 "must be true) before you may call fillComplete().");
4346 const int numProcs = this->getComm ()->getSize ();
4356 bool assertNoNonlocalInserts =
false;
4359 bool sortGhosts =
true;
4361 if (! params.is_null ()) {
4362 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4363 assertNoNonlocalInserts);
4364 if (params->isParameter (
"sort column map ghost gids")) {
4365 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4367 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4368 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4373 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4375 if (! this->myGraph_.is_null ()) {
4376 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4379 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4380 if (this->hasColMap ()) {
4381 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4384 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4389 if (needGlobalAssemble) {
4390 this->globalAssemble ();
4393 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4394 (numProcs == 1 && nonlocals_.size() > 0,
4395 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4396 "An invalid entry (i.e., with row index not in the row Map) must have "
4397 "been submitted to the CrsMatrix.");
4400 if (this->isStaticGraph ()) {
4408 #ifdef HAVE_TPETRA_DEBUG
4426 const bool domainMapsMatch =
4427 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4428 const bool rangeMapsMatch =
4429 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4431 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4432 (! domainMapsMatch, std::runtime_error,
4433 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4434 "The graph cannot be changed because it was given to the CrsMatrix "
4435 "constructor as const. You can fix this by passing in the graph's "
4436 "domain Map and range Map to the matrix's fillComplete call.");
4438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4439 (! rangeMapsMatch, std::runtime_error,
4440 "The CrsMatrix's range Map does not match the graph's range Map. "
4441 "The graph cannot be changed because it was given to the CrsMatrix "
4442 "constructor as const. You can fix this by passing in the graph's "
4443 "domain Map and range Map to the matrix's fillComplete call.");
4444 #endif // HAVE_TPETRA_DEBUG
4448 this->fillLocalMatrix (params);
4456 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4459 Teuchos::Array<int> remotePIDs (0);
4460 const bool mustBuildColMap = ! this->hasColMap ();
4461 if (mustBuildColMap) {
4462 this->myGraph_->makeColMap (remotePIDs);
4467 const std::pair<size_t, std::string> makeIndicesLocalResult =
4468 this->myGraph_->makeIndicesLocal(verbose);
4473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4474 (makeIndicesLocalResult.first != 0, std::runtime_error,
4475 makeIndicesLocalResult.second);
4477 const bool sorted = this->myGraph_->isSorted ();
4478 const bool merged = this->myGraph_->isMerged ();
4479 this->sortAndMergeIndicesAndValues (sorted, merged);
4484 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4488 this->fillLocalGraphAndMatrix (params);
4490 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4491 params->get (
"compute global constants",
true);
4492 if (callGraphComputeGlobalConstants) {
4493 this->myGraph_->computeGlobalConstants ();
4496 this->myGraph_->computeLocalConstants ();
4498 this->myGraph_->fillComplete_ =
true;
4499 this->myGraph_->checkInternalState ();
4504 this->fillComplete_ =
true;
4507 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4509 this->checkInternalState ();
4513 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4517 const Teuchos::RCP<const map_type> & rangeMap,
4518 const Teuchos::RCP<const import_type>& importer,
4519 const Teuchos::RCP<const export_type>& exporter,
4520 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4522 #ifdef HAVE_TPETRA_MMM_TIMINGS
4524 if(!params.is_null())
4525 label = params->get(
"Timer Label",label);
4526 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4527 using Teuchos::TimeMonitor;
4529 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4532 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4533 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4534 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4535 "must be true) before calling fillComplete().");
4536 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4537 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4540 #ifdef HAVE_TPETRA_MMM_TIMINGS
4541 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4544 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4548 #ifdef HAVE_TPETRA_MMM_TIMINGS
4549 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4552 fillLocalGraphAndMatrix (params);
4557 fillComplete_ =
true;
4560 #ifdef HAVE_TPETRA_DEBUG
4561 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4562 ": We're at the end of fillComplete(), but isFillActive() is true. "
4563 "Please report this bug to the Tpetra developers.");
4564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4565 ": We're at the end of fillComplete(), but isFillActive() is true. "
4566 "Please report this bug to the Tpetra developers.");
4567 #endif // HAVE_TPETRA_DEBUG
4569 #ifdef HAVE_TPETRA_MMM_TIMINGS
4570 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4573 checkInternalState();
4577 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4583 LocalOrdinal* beg = cols;
4584 LocalOrdinal* end = cols + rowLen;
4585 LocalOrdinal* newend = beg;
4587 LocalOrdinal* cur = beg + 1;
4591 while (cur != end) {
4592 if (*cur != *newend) {
4609 return newend - beg;
4612 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4617 using ::Tpetra::Details::ProfilingRegion;
4618 typedef LocalOrdinal LO;
4619 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4620 host_execution_space;
4621 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4622 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4623 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4625 if (! sorted || ! merged) {
4626 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4627 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4628 "\"static\" (const) graph, since the matrix does not own the graph.");
4629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4630 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4631 "this matrix claims ! isStaticGraph(). "
4632 "Please report this bug to the Tpetra developers.");
4633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4634 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4635 "this method if the graph's storage has already been optimized. "
4636 "Please report this bug to the Tpetra developers.");
4639 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4640 size_t totalNumDups = 0;
4645 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4647 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4648 [=] (
const LO lclRow,
size_t& numDups) {
4649 size_t rowBegin = rowBegins_(lclRow);
4650 size_t rowLen = rowLengths_(lclRow);
4651 LO* cols = cols_.data() + rowBegin;
4654 sort2 (cols, cols + rowLen, vals);
4657 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4658 rowLengths_(lclRow) = newRowLength;
4659 numDups += rowLen - newRowLength;
4672 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4683 using Teuchos::rcp_const_cast;
4684 using Teuchos::rcpFromRef;
4685 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4686 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4692 if (alpha == ZERO) {
4695 }
else if (beta != ONE) {
4709 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4710 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4716 const bool Y_is_overwritten = (beta ==
ZERO);
4719 const bool Y_is_replicated =
4720 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4728 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4735 RCP<const MV> X_colMap;
4736 if (importer.is_null ()) {
4744 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4746 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4751 X_colMap = rcpFromRef (X_in);
4755 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4761 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4764 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4765 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4772 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4779 if (! exporter.is_null ()) {
4780 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4782 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4788 if (Y_is_overwritten) {
4814 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4821 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4825 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4833 if (Y_is_replicated) {
4834 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4839 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4844 const Teuchos::ETransp mode,
4849 using Teuchos::null;
4852 using Teuchos::rcp_const_cast;
4853 using Teuchos::rcpFromRef;
4854 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4857 if (alpha == ZERO) {
4870 else if (beta == ZERO) {
4888 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4889 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4894 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4895 const bool Y_is_overwritten = (beta ==
ZERO);
4896 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4902 X = rcp (
new MV (X_in, Teuchos::Copy));
4904 X = rcpFromRef (X_in);
4908 if (importer != Teuchos::null) {
4909 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4912 if (importMV_ == null) {
4913 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4916 if (exporter != Teuchos::null) {
4917 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4920 if (exportMV_ == null) {
4921 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4927 if (! exporter.is_null ()) {
4928 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4929 exportMV_->doImport (X_in, *exporter,
INSERT);
4936 if (importer != Teuchos::null) {
4937 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4944 importMV_->putScalar (ZERO);
4946 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4948 if (Y_is_overwritten) {
4965 MV Y (Y_in, Teuchos::Copy);
4966 this->localApply (*X, Y, mode, alpha, beta);
4969 this->localApply (*X, Y_in, mode, alpha, beta);
4976 if (Y_is_replicated) {
4977 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4987 const Teuchos::ETransp mode,
4988 const Scalar& alpha,
4989 const Scalar& beta)
const
4992 using Teuchos::NO_TRANS;
4993 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5000 const char tfecfFuncName[] =
"localApply: ";
5001 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5005 const bool transpose = (mode != Teuchos::NO_TRANS);
5006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5008 getColMap ()->getLocalNumElements (), std::runtime_error,
5009 "NO_TRANS case: X has the wrong number of local rows. "
5011 "getColMap()->getLocalNumElements() = " <<
5012 getColMap ()->getLocalNumElements () <<
".");
5013 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5015 getRowMap ()->getLocalNumElements (), std::runtime_error,
5016 "NO_TRANS case: Y has the wrong number of local rows. "
5018 "getRowMap()->getLocalNumElements() = " <<
5019 getRowMap ()->getLocalNumElements () <<
".");
5020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5022 getRowMap ()->getLocalNumElements (), std::runtime_error,
5023 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5025 <<
" != getRowMap()->getLocalNumElements() = "
5026 << getRowMap ()->getLocalNumElements () <<
".");
5027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5029 getColMap ()->getLocalNumElements (), std::runtime_error,
5030 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5032 <<
" != getColMap()->getLocalNumElements() = "
5033 << getColMap ()->getLocalNumElements () <<
".");
5034 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5035 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5036 "fill complete. You must call fillComplete() (possibly with "
5037 "domain and range Map arguments) without an intervening "
5038 "resumeFill() call before you may call this method.");
5039 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5041 std::runtime_error,
"X and Y must be constant stride.");
5046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5047 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5048 && X_lcl.extent(0) != 0,
5049 std::runtime_error,
"X and Y may not alias one another.");
5052 #if KOKKOSKERNELS_VERSION >= 40299
5053 auto A_lcl = getLocalMatrixDevice();
5055 if(!applyHelper.get()) {
5058 bool useMergePath =
false;
5059 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5065 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5066 LocalOrdinal nrows = getLocalNumRows();
5067 LocalOrdinal maxRowImbalance = 0;
5069 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5072 useMergePath =
true;
5075 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5076 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5080 const char* modeKK =
nullptr;
5083 case Teuchos::NO_TRANS:
5084 modeKK = KokkosSparse::NoTranspose;
break;
5085 case Teuchos::TRANS:
5086 modeKK = KokkosSparse::Transpose;
break;
5087 case Teuchos::CONJ_TRANS:
5088 modeKK = KokkosSparse::ConjugateTranspose;
break;
5090 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5093 if(applyHelper->shouldUseIntRowptrs())
5095 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5097 &applyHelper->handle_int, modeKK,
5103 &applyHelper->handle, modeKK,
5107 LocalOrdinal nrows = getLocalNumRows();
5108 LocalOrdinal maxRowImbalance = 0;
5110 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5112 auto matrix_lcl = getLocalMultiplyOperator();
5114 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5116 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5120 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5125 Teuchos::ETransp mode,
5130 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5132 TEUCHOS_TEST_FOR_EXCEPTION
5133 (! isFillComplete (), std::runtime_error,
5134 fnName <<
": Cannot call apply() until fillComplete() "
5135 "has been called.");
5137 if (mode == Teuchos::NO_TRANS) {
5138 ProfilingRegion regionNonTranspose (fnName);
5139 this->applyNonTranspose (X, Y, alpha, beta);
5142 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5143 this->applyTranspose (X, Y, mode, alpha, beta);
5148 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5150 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5156 const char tfecfFuncName[] =
"convert: ";
5158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5159 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5160 "of the conversion) is not fill complete. You must first call "
5161 "fillComplete() (possibly with the domain and range Map) without an "
5162 "intervening call to resumeFill(), before you may call this method.");
5164 RCP<output_matrix_type> newMatrix
5165 (
new output_matrix_type (this->getCrsGraph ()));
5169 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5170 this->getLocalMatrixDevice ().values);
5174 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5180 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5187 const char tfecfFuncName[] =
"checkInternalState: ";
5188 const char err[] =
"Internal state is not consistent. "
5189 "Please report this bug to the Tpetra developers.";
5193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5194 (staticGraph_.is_null (), std::logic_error, err);
5198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5199 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5200 std::logic_error, err);
5202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5203 (isFillComplete () && ! staticGraph_->isFillComplete (),
5204 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5205 "but its graph is NOT fill complete.");
5208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5209 (staticGraph_->indicesAreAllocated () &&
5210 staticGraph_->getLocalAllocationSize() > 0 &&
5211 staticGraph_->getLocalNumRows() > 0 &&
5212 valuesUnpacked_wdv.extent (0) == 0,
5213 std::logic_error, err);
5217 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5222 std::ostringstream os;
5224 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5225 if (this->getObjectLabel () !=
"") {
5226 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5228 if (isFillComplete ()) {
5229 os <<
"isFillComplete: true"
5230 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5231 << getGlobalNumCols () <<
"]"
5232 <<
", global number of entries: " << getGlobalNumEntries ()
5236 os <<
"isFillComplete: false"
5237 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5238 << getGlobalNumCols () <<
"]}";
5243 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5247 const Teuchos::EVerbosityLevel verbLevel)
const
5251 using Teuchos::ArrayView;
5252 using Teuchos::Comm;
5254 using Teuchos::TypeNameTraits;
5255 using Teuchos::VERB_DEFAULT;
5256 using Teuchos::VERB_NONE;
5257 using Teuchos::VERB_LOW;
5258 using Teuchos::VERB_MEDIUM;
5259 using Teuchos::VERB_HIGH;
5260 using Teuchos::VERB_EXTREME;
5262 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5264 if (vl == VERB_NONE) {
5269 Teuchos::OSTab tab0 (out);
5271 RCP<const Comm<int> > comm = this->getComm();
5272 const int myRank = comm->getRank();
5273 const int numProcs = comm->getSize();
5275 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5278 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5288 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5290 Teuchos::OSTab tab1 (out);
5293 if (this->getObjectLabel () !=
"") {
5294 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5297 out <<
"Template parameters:" << endl;
5298 Teuchos::OSTab tab2 (out);
5299 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5300 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5301 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5302 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5304 if (isFillComplete()) {
5305 out <<
"isFillComplete: true" << endl
5306 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5307 << getGlobalNumCols () <<
"]" << endl
5308 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5309 << endl <<
"Global max number of entries in a row: "
5310 << getGlobalMaxNumRowEntries () << endl;
5313 out <<
"isFillComplete: false" << endl
5314 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5315 << getGlobalNumCols () <<
"]" << endl;
5319 if (vl < VERB_MEDIUM) {
5325 out << endl <<
"Row Map:" << endl;
5327 if (getRowMap ().is_null ()) {
5329 out <<
"null" << endl;
5336 getRowMap ()->describe (out, vl);
5341 out <<
"Column Map: ";
5343 if (getColMap ().is_null ()) {
5345 out <<
"null" << endl;
5347 }
else if (getColMap () == getRowMap ()) {
5349 out <<
"same as row Map" << endl;
5355 getColMap ()->describe (out, vl);
5360 out <<
"Domain Map: ";
5362 if (getDomainMap ().is_null ()) {
5364 out <<
"null" << endl;
5366 }
else if (getDomainMap () == getRowMap ()) {
5368 out <<
"same as row Map" << endl;
5370 }
else if (getDomainMap () == getColMap ()) {
5372 out <<
"same as column Map" << endl;
5378 getDomainMap ()->describe (out, vl);
5383 out <<
"Range Map: ";
5385 if (getRangeMap ().is_null ()) {
5387 out <<
"null" << endl;
5389 }
else if (getRangeMap () == getDomainMap ()) {
5391 out <<
"same as domain Map" << endl;
5393 }
else if (getRangeMap () == getRowMap ()) {
5395 out <<
"same as row Map" << endl;
5401 getRangeMap ()->describe (out, vl);
5405 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5406 if (myRank == curRank) {
5407 out <<
"Process rank: " << curRank << endl;
5408 Teuchos::OSTab tab2 (out);
5409 if (! staticGraph_->indicesAreAllocated ()) {
5410 out <<
"Graph indices not allocated" << endl;
5413 out <<
"Number of allocated entries: "
5414 << staticGraph_->getLocalAllocationSize () << endl;
5416 out <<
"Number of entries: " << getLocalNumEntries () << endl
5417 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5426 if (vl < VERB_HIGH) {
5431 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5432 if (myRank == curRank) {
5433 out << std::setw(width) <<
"Proc Rank"
5434 << std::setw(width) <<
"Global Row"
5435 << std::setw(width) <<
"Num Entries";
5436 if (vl == VERB_EXTREME) {
5437 out << std::setw(width) <<
"(Index,Value)";
5440 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5441 const size_t nE = getNumEntriesInLocalRow(r);
5442 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5443 out << std::setw(width) << myRank
5444 << std::setw(width) << gid
5445 << std::setw(width) << nE;
5446 if (vl == VERB_EXTREME) {
5447 if (isGloballyIndexed()) {
5448 global_inds_host_view_type rowinds;
5449 values_host_view_type rowvals;
5450 getGlobalRowView (gid, rowinds, rowvals);
5451 for (
size_t j = 0; j < nE; ++j) {
5452 out <<
" (" << rowinds[j]
5453 <<
", " << rowvals[j]
5457 else if (isLocallyIndexed()) {
5458 local_inds_host_view_type rowinds;
5459 values_host_view_type rowvals;
5460 getLocalRowView (r, rowinds, rowvals);
5461 for (
size_t j=0; j < nE; ++j) {
5462 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5463 <<
", " << rowvals[j]
5479 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5493 return (srcRowMat !=
nullptr);
5496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5500 const typename crs_graph_type::padding_type& padding,
5506 using LO = local_ordinal_type;
5507 using row_ptrs_type =
5508 typename local_graph_device_type::row_map_type::non_const_type;
5509 using range_policy =
5510 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5511 const char tfecfFuncName[] =
"applyCrsPadding";
5512 const char suffix[] =
5513 ". Please report this bug to the Tpetra developers.";
5514 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5516 std::unique_ptr<std::string> prefix;
5518 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5519 std::ostringstream os;
5520 os << *prefix <<
"padding: ";
5523 std::cerr << os.str();
5525 const int myRank = ! verbose ? -1 : [&] () {
5526 auto map = this->getMap();
5527 if (map.is_null()) {
5530 auto comm = map->getComm();
5531 if (comm.is_null()) {
5534 return comm->getRank();
5538 if (! myGraph_->indicesAreAllocated()) {
5540 std::ostringstream os;
5541 os << *prefix <<
"Call allocateIndices" << endl;
5542 std::cerr << os.str();
5544 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5556 std::ostringstream os;
5557 os << *prefix <<
"Allocate row_ptrs_beg: "
5558 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5559 std::cerr << os.str();
5561 using Kokkos::view_alloc;
5562 using Kokkos::WithoutInitializing;
5563 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5564 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5566 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5568 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5569 size_t(row_ptr_beg.extent(0) - 1);
5571 std::ostringstream os;
5572 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5573 std::cerr << os.str();
5575 row_ptrs_type row_ptr_end(
5576 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5578 row_ptrs_type num_row_entries_d;
5580 const bool refill_num_row_entries =
5581 myGraph_->k_numRowEntries_.extent(0) != 0;
5583 if (refill_num_row_entries) {
5586 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5587 myGraph_->k_numRowEntries_);
5588 Kokkos::parallel_for
5589 (
"Fill end row pointers", range_policy(0, N),
5590 KOKKOS_LAMBDA (
const size_t i) {
5591 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5598 Kokkos::parallel_for
5599 (
"Fill end row pointers", range_policy(0, N),
5600 KOKKOS_LAMBDA (
const size_t i) {
5601 row_ptr_end(i) = row_ptr_beg(i+1);
5605 if (myGraph_->isGloballyIndexed()) {
5607 myGraph_->gblInds_wdv,
5608 valuesUnpacked_wdv, padding, myRank, verbose);
5609 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5610 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5611 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5612 (newValuesLen != newColIndsLen, std::logic_error,
5613 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5614 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5619 myGraph_->lclIndsUnpacked_wdv,
5620 valuesUnpacked_wdv, padding, myRank, verbose);
5621 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5622 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5623 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5624 (newValuesLen != newColIndsLen, std::logic_error,
5625 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5626 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5630 if (refill_num_row_entries) {
5631 Kokkos::parallel_for
5632 (
"Fill num entries", range_policy(0, N),
5633 KOKKOS_LAMBDA (
const size_t i) {
5634 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5640 std::ostringstream os;
5641 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5642 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5643 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5644 std::cerr << os.str();
5645 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5646 row_ptr_beg.extent(0) );
5648 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5651 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5653 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5654 copyAndPermuteStaticGraph(
5655 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5656 const size_t numSameIDs,
5657 const LocalOrdinal permuteToLIDs[],
5658 const LocalOrdinal permuteFromLIDs[],
5659 const size_t numPermutes)
5661 using Details::ProfilingRegion;
5662 using Teuchos::Array;
5663 using Teuchos::ArrayView;
5665 using LO = LocalOrdinal;
5666 using GO = GlobalOrdinal;
5667 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5668 const char suffix[] =
5669 " Please report this bug to the Tpetra developers.";
5670 ProfilingRegion regionCAP
5671 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5675 std::unique_ptr<std::string> prefix;
5677 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5678 std::ostringstream os;
5679 os << *prefix <<
"Start" << endl;
5681 const char*
const prefix_raw =
5682 verbose ? prefix.get()->c_str() :
nullptr;
5684 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5689 const map_type& srcRowMap = * (srcMat.getRowMap ());
5690 nonconst_global_inds_host_view_type rowInds;
5691 nonconst_values_host_view_type rowVals;
5692 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5693 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5697 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5698 const GO targetGID = sourceGID;
5700 ArrayView<const GO>rowIndsConstView;
5701 ArrayView<const Scalar> rowValsConstView;
5703 if (sourceIsLocallyIndexed) {
5704 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5705 if (rowLength > static_cast<size_t> (rowInds.size())) {
5706 Kokkos::resize(rowInds,rowLength);
5707 Kokkos::resize(rowVals,rowLength);
5711 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5712 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5717 size_t checkRowLength = 0;
5718 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5719 rowValsView, checkRowLength);
5721 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5722 (rowLength != checkRowLength, std::logic_error,
"For "
5723 "global row index " << sourceGID <<
", the source "
5724 "matrix's getNumEntriesInGlobalRow returns a row length "
5725 "of " << rowLength <<
", but getGlobalRowCopy reports "
5726 "a row length of " << checkRowLength <<
"." << suffix);
5733 rowIndsConstView = Teuchos::ArrayView<const GO> (
5734 rowIndsView.data(), rowIndsView.extent(0),
5735 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5736 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5737 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5738 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5743 global_inds_host_view_type rowIndsView;
5744 values_host_view_type rowValsView;
5745 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5750 rowIndsConstView = Teuchos::ArrayView<const GO> (
5751 rowIndsView.data(), rowIndsView.extent(0),
5752 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5753 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5754 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5755 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5763 combineGlobalValues(targetGID, rowIndsConstView,
5765 prefix_raw, debug, verbose);
5769 std::ostringstream os;
5770 os << *prefix <<
"Do permutes" << endl;
5773 const map_type& tgtRowMap = * (this->getRowMap ());
5774 for (
size_t p = 0; p < numPermutes; ++p) {
5775 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5776 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5778 ArrayView<const GO> rowIndsConstView;
5779 ArrayView<const Scalar> rowValsConstView;
5781 if (sourceIsLocallyIndexed) {
5782 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5783 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5784 Kokkos::resize(rowInds,rowLength);
5785 Kokkos::resize(rowVals,rowLength);
5789 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5790 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5795 size_t checkRowLength = 0;
5796 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5797 rowValsView, checkRowLength);
5799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5800 (rowLength != checkRowLength, std::logic_error,
"For "
5801 "source matrix global row index " << sourceGID <<
", "
5802 "getNumEntriesInGlobalRow returns a row length of " <<
5803 rowLength <<
", but getGlobalRowCopy a row length of "
5804 << checkRowLength <<
"." << suffix);
5811 rowIndsConstView = Teuchos::ArrayView<const GO> (
5812 rowIndsView.data(), rowIndsView.extent(0),
5813 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5814 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5815 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5816 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5821 global_inds_host_view_type rowIndsView;
5822 values_host_view_type rowValsView;
5823 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5828 rowIndsConstView = Teuchos::ArrayView<const GO> (
5829 rowIndsView.data(), rowIndsView.extent(0),
5830 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5831 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5832 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5833 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5838 combineGlobalValues(targetGID, rowIndsConstView,
5840 prefix_raw, debug, verbose);
5844 std::ostringstream os;
5845 os << *prefix <<
"Done" << endl;
5849 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5851 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5852 copyAndPermuteNonStaticGraph(
5853 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5854 const size_t numSameIDs,
5855 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5856 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5857 const size_t numPermutes)
5859 using Details::ProfilingRegion;
5860 using Teuchos::Array;
5861 using Teuchos::ArrayView;
5863 using LO = LocalOrdinal;
5864 using GO = GlobalOrdinal;
5865 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5866 const char suffix[] =
5867 " Please report this bug to the Tpetra developers.";
5868 ProfilingRegion regionCAP
5869 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5873 std::unique_ptr<std::string> prefix;
5875 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5876 std::ostringstream os;
5877 os << *prefix <<
"Start" << endl;
5879 const char*
const prefix_raw =
5880 verbose ? prefix.get()->c_str() :
nullptr;
5883 using row_graph_type = RowGraph<LO, GO, Node>;
5884 const row_graph_type& srcGraph = *(srcMat.getGraph());
5886 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5887 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5888 applyCrsPadding(*padding, verbose);
5890 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5895 const map_type& srcRowMap = * (srcMat.getRowMap ());
5896 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5897 using gids_type = nonconst_global_inds_host_view_type;
5898 using vals_type = nonconst_values_host_view_type;
5901 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5905 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5906 const GO targetGID = sourceGID;
5908 ArrayView<const GO> rowIndsConstView;
5909 ArrayView<const Scalar> rowValsConstView;
5911 if (sourceIsLocallyIndexed) {
5913 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5914 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5915 Kokkos::resize(rowInds,rowLength);
5916 Kokkos::resize(rowVals,rowLength);
5920 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5921 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5926 size_t checkRowLength = 0;
5927 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5930 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5931 (rowLength != checkRowLength, std::logic_error,
": For "
5932 "global row index " << sourceGID <<
", the source "
5933 "matrix's getNumEntriesInGlobalRow returns a row length "
5934 "of " << rowLength <<
", but getGlobalRowCopy reports "
5935 "a row length of " << checkRowLength <<
"." << suffix);
5937 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5938 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5941 global_inds_host_view_type rowIndsView;
5942 values_host_view_type rowValsView;
5943 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5949 rowIndsConstView = Teuchos::ArrayView<const GO> (
5950 rowIndsView.data(), rowIndsView.extent(0),
5951 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5952 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5953 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5954 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5960 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5961 rowValsConstView, prefix_raw, debug, verbose);
5965 std::ostringstream os;
5966 os << *prefix <<
"Do permutes" << endl;
5968 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5969 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5971 const map_type& tgtRowMap = * (this->getRowMap ());
5972 for (
size_t p = 0; p < numPermutes; ++p) {
5973 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5974 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5976 ArrayView<const GO> rowIndsConstView;
5977 ArrayView<const Scalar> rowValsConstView;
5979 if (sourceIsLocallyIndexed) {
5980 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5981 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5982 Kokkos::resize(rowInds,rowLength);
5983 Kokkos::resize(rowVals,rowLength);
5987 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5988 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5993 size_t checkRowLength = 0;
5994 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5995 rowValsView, checkRowLength);
5997 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5998 (rowLength != checkRowLength, std::logic_error,
"For "
5999 "source matrix global row index " << sourceGID <<
", "
6000 "getNumEntriesInGlobalRow returns a row length of " <<
6001 rowLength <<
", but getGlobalRowCopy a row length of "
6002 << checkRowLength <<
"." << suffix);
6004 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6005 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6008 global_inds_host_view_type rowIndsView;
6009 values_host_view_type rowValsView;
6010 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6016 rowIndsConstView = Teuchos::ArrayView<const GO> (
6017 rowIndsView.data(), rowIndsView.extent(0),
6018 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6019 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6020 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6021 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6027 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6028 rowValsConstView, prefix_raw, debug, verbose);
6032 std::ostringstream os;
6033 os << *prefix <<
"Done" << endl;
6037 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6042 const size_t numSameIDs,
6043 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6044 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6053 const char tfecfFuncName[] =
"copyAndPermute: ";
6054 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6056 const bool verbose = Behavior::verbose(
"CrsMatrix");
6057 std::unique_ptr<std::string> prefix;
6059 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6060 std::ostringstream os;
6061 os << *prefix << endl
6062 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6063 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6072 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6074 std::cerr << os.str ();
6077 const auto numPermute = permuteToLIDs.extent (0);
6078 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6079 (numPermute != permuteFromLIDs.extent (0),
6080 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6081 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6082 << permuteFromLIDs.extent (0) <<
".");
6087 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6088 if (isStaticGraph ()) {
6089 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6090 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6091 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6092 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6094 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6095 permuteToLIDs_h.data(),
6096 permuteFromLIDs_h.data(),
6100 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6101 permuteFromLIDs, numPermute);
6105 std::ostringstream os;
6106 os << *prefix <<
"Done" << endl;
6107 std::cerr << os.str();
6111 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6116 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6117 Kokkos::DualView<char*, buffer_device_type>& exports,
6118 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6119 size_t& constantNumPackets)
6124 using Teuchos::outArg;
6125 using Teuchos::REDUCE_MAX;
6126 using Teuchos::reduceAll;
6128 typedef LocalOrdinal LO;
6129 typedef GlobalOrdinal GO;
6130 const char tfecfFuncName[] =
"packAndPrepare: ";
6131 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6133 const bool debug = Behavior::debug(
"CrsMatrix");
6134 const bool verbose = Behavior::verbose(
"CrsMatrix");
6137 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6138 if (pComm.is_null ()) {
6141 const Teuchos::Comm<int>& comm = *pComm;
6142 const int myRank = comm.getSize ();
6144 std::unique_ptr<std::string> prefix;
6146 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6147 std::ostringstream os;
6148 os << *prefix <<
"Start" << endl
6158 std::cerr << os.str ();
6181 std::ostringstream msg;
6184 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6185 const crs_matrix_type* srcCrsMat =
6186 dynamic_cast<const crs_matrix_type*
> (&source);
6187 if (srcCrsMat !=
nullptr) {
6189 std::ostringstream os;
6190 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6191 "calling packNew" << endl;
6192 std::cerr << os.str ();
6195 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6196 constantNumPackets);
6198 catch (std::exception& e) {
6200 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6204 using Kokkos::HostSpace;
6205 using Kokkos::subview;
6206 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6207 using range_type = Kokkos::pair<size_t, size_t>;
6210 std::ostringstream os;
6211 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6213 std::cerr << os.str ();
6216 const row_matrix_type* srcRowMat =
6217 dynamic_cast<const row_matrix_type*
> (&source);
6218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6219 (srcRowMat ==
nullptr, std::invalid_argument,
6220 "The source object of the Import or Export operation is neither a "
6221 "CrsMatrix (with the same template parameters as the target object), "
6222 "nor a RowMatrix (with the same first four template parameters as the "
6233 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6234 auto exportLIDs_h = exportLIDs.view_host ();
6235 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6236 exportLIDs_h.size ());
6240 Teuchos::Array<char> exports_a;
6246 numPacketsPerLID.clear_sync_state ();
6247 numPacketsPerLID.modify_host ();
6248 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6249 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6250 numPacketsPerLID_h.size ());
6255 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6256 constantNumPackets);
6258 catch (std::exception& e) {
6260 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6264 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6265 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6266 const std::string oldLabel = exports.view_device().label ();
6267 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6268 exports = exports_type (newLabel, newAllocSize);
6273 exports.modify_host();
6275 auto exports_h = exports.view_host ();
6276 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6280 typedef typename exports_type::t_host::execution_space HES;
6281 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6282 Kokkos::View<const char*, host_device_type>
6283 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6290 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6293 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6294 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6295 "one or more participating processes.");
6299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6300 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6301 "or more participating processes. Here is this process' error "
6302 "message: " << msg.str ());
6306 std::ostringstream os;
6307 os << *prefix <<
"packAndPrepare: Done!" << endl
6317 std::cerr << os.str ();
6321 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6323 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6324 packRow (
char exports[],
6325 const size_t offset,
6326 const size_t numEnt,
6327 const GlobalOrdinal gidsIn[],
6328 const impl_scalar_type valsIn[],
6329 const size_t numBytesPerValue)
const
6332 using Kokkos::subview;
6334 typedef LocalOrdinal LO;
6335 typedef GlobalOrdinal GO;
6336 typedef impl_scalar_type ST;
6344 const LO numEntLO =
static_cast<size_t> (numEnt);
6346 const size_t numEntBeg = offset;
6348 const size_t gidsBeg = numEntBeg + numEntLen;
6349 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6350 const size_t valsBeg = gidsBeg + gidsLen;
6351 const size_t valsLen = numEnt * numBytesPerValue;
6353 char*
const numEntOut = exports + numEntBeg;
6354 char*
const gidsOut = exports + gidsBeg;
6355 char*
const valsOut = exports + valsBeg;
6357 size_t numBytesOut = 0;
6362 Kokkos::pair<int, size_t> p;
6363 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6364 errorCode += p.first;
6365 numBytesOut += p.second;
6367 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6368 errorCode += p.first;
6369 numBytesOut += p.second;
6372 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6373 TEUCHOS_TEST_FOR_EXCEPTION
6374 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6375 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6376 << expectedNumBytes <<
".");
6377 TEUCHOS_TEST_FOR_EXCEPTION
6378 (errorCode != 0, std::runtime_error,
"packRow: "
6379 "PackTraits::packArray returned a nonzero error code");
6384 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6386 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6387 unpackRow (GlobalOrdinal gidsOut[],
6388 impl_scalar_type valsOut[],
6389 const char imports[],
6390 const size_t offset,
6391 const size_t numBytes,
6392 const size_t numEnt,
6393 const size_t numBytesPerValue)
6396 using Kokkos::subview;
6398 typedef LocalOrdinal LO;
6399 typedef GlobalOrdinal GO;
6400 typedef impl_scalar_type ST;
6402 Details::ProfilingRegion region_upack_row(
6403 "Tpetra::CrsMatrix::unpackRow",
6407 if (numBytes == 0) {
6410 const int myRank = this->getMap ()->getComm ()->getRank ();
6411 TEUCHOS_TEST_FOR_EXCEPTION
6412 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6413 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6414 "number of entries to unpack (as reported by numPacketsPerLID) "
6415 "for this row numEnt=" << numEnt <<
" != 0.");
6420 if (numEnt == 0 && numBytes != 0) {
6421 const int myRank = this->getMap ()->getComm ()->getRank ();
6422 TEUCHOS_TEST_FOR_EXCEPTION
6423 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6424 "unpackRow: The number of entries to unpack (as reported by "
6425 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6426 "numBytes=" << numBytes <<
" != 0.");
6432 const size_t numEntBeg = offset;
6434 const size_t gidsBeg = numEntBeg + numEntLen;
6435 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6436 const size_t valsBeg = gidsBeg + gidsLen;
6437 const size_t valsLen = numEnt * numBytesPerValue;
6439 const char*
const numEntIn = imports + numEntBeg;
6440 const char*
const gidsIn = imports + gidsBeg;
6441 const char*
const valsIn = imports + valsBeg;
6443 size_t numBytesOut = 0;
6447 if (static_cast<size_t> (numEntOut) != numEnt ||
6448 numEntOut == static_cast<LO> (0)) {
6449 const int myRank = this->getMap ()->getComm ()->getRank ();
6450 std::ostringstream os;
6451 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6452 bool firstErrorCondition =
false;
6453 if (static_cast<size_t> (numEntOut) != numEnt) {
6454 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6455 <<
" does not equal number of entries unpacked from imports "
6456 "buffer numEntOut=" << numEntOut <<
".";
6457 firstErrorCondition =
true;
6459 if (numEntOut == static_cast<LO> (0)) {
6460 if (firstErrorCondition) {
6463 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6464 "but number of bytes to unpack for this row numBytes=" << numBytes
6465 <<
" != 0. This should never happen, since packRow should only "
6466 "ever pack rows with a nonzero number of entries. In this case, "
6467 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6470 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6474 Kokkos::pair<int, size_t> p;
6475 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6476 errorCode += p.first;
6477 numBytesOut += p.second;
6479 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6480 errorCode += p.first;
6481 numBytesOut += p.second;
6484 TEUCHOS_TEST_FOR_EXCEPTION
6485 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6486 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6488 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6489 TEUCHOS_TEST_FOR_EXCEPTION
6490 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6491 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6492 << expectedNumBytes <<
".");
6494 TEUCHOS_TEST_FOR_EXCEPTION
6495 (errorCode != 0, std::runtime_error,
"unpackRow: "
6496 "PackTraits::unpackArray returned a nonzero error code");
6501 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6503 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6504 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6505 size_t& totalNumEntries,
6506 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6508 using Details::Behavior;
6511 typedef impl_scalar_type IST;
6512 typedef LocalOrdinal LO;
6513 typedef GlobalOrdinal GO;
6519 const bool verbose = Behavior::verbose(
"CrsMatrix");
6520 std::unique_ptr<std::string> prefix;
6522 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6523 std::ostringstream os;
6524 os << *prefix <<
"Before:"
6532 std::cerr << os.str ();
6537 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6539 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6540 auto exportLIDs_h = exportLIDs.view_host ();
6543 totalNumEntries = 0;
6544 for (LO i = 0; i < numExportLIDs; ++i) {
6545 const LO lclRow = exportLIDs_h[i];
6546 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6549 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6552 totalNumEntries += curNumEntries;
6563 const size_t allocSize =
6564 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6565 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6566 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6567 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6569 const std::string oldLabel = exports.view_device().label ();
6570 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6571 exports = exports_type (newLabel, allocSize);
6575 std::ostringstream os;
6576 os << *prefix <<
"After:"
6584 std::cerr << os.str ();
6588 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6591 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6592 Kokkos::DualView<char*, buffer_device_type>& exports,
6593 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6594 size_t& constantNumPackets)
const
6598 if (this->isStaticGraph ()) {
6601 constantNumPackets);
6604 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6605 constantNumPackets);
6609 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6612 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6613 Kokkos::DualView<char*, buffer_device_type>& exports,
6614 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6615 size_t& constantNumPackets)
const
6623 using LO = LocalOrdinal;
6624 using GO = GlobalOrdinal;
6625 using ST = impl_scalar_type;
6626 const char tfecfFuncName[] =
"packNonStaticNew: ";
6628 const bool verbose = Behavior::verbose(
"CrsMatrix");
6629 std::unique_ptr<std::string> prefix;
6631 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6632 std::ostringstream os;
6633 os << *prefix <<
"Start" << endl;
6634 std::cerr << os.str ();
6637 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6639 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6640 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6641 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6647 constantNumPackets = 0;
6652 size_t totalNumEntries = 0;
6653 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6654 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6657 exports.clear_sync_state();
6658 exports.modify_host();
6659 auto exports_h = exports.view_host ();
6661 std::ostringstream os;
6662 os << *prefix <<
"After marking exports as modified on host, "
6664 std::cerr << os.str ();
6668 auto exportLIDs_h = exportLIDs.view_host ();
6671 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6672 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6673 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6678 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6682 typename global_inds_host_view_type::non_const_type gidsIn_k;
6683 if (this->isLocallyIndexed()) {
6685 typename global_inds_host_view_type::non_const_type(
"packGids",
6690 for (
size_t i = 0; i < numExportLIDs; ++i) {
6691 const LO lclRow = exportLIDs_h[i];
6693 size_t numBytes = 0;
6694 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6701 numPacketsPerLID_h[i] = 0;
6705 if (this->isLocallyIndexed ()) {
6706 typename global_inds_host_view_type::non_const_type gidsIn;
6707 values_host_view_type valsIn;
6711 local_inds_host_view_type lidsIn;
6712 this->getLocalRowView (lclRow, lidsIn, valsIn);
6713 const map_type& colMap = * (this->getColMap ());
6714 for (
size_t k = 0; k < numEnt; ++k) {
6715 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6717 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6719 const size_t numBytesPerValue =
6720 PackTraits<ST>::packValueCount (valsIn[0]);
6721 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6722 gidsIn.data (), valsIn.data (),
6725 else if (this->isGloballyIndexed ()) {
6726 global_inds_host_view_type gidsIn;
6727 values_host_view_type valsIn;
6733 const map_type& rowMap = * (this->getRowMap ());
6734 const GO gblRow = rowMap.getGlobalElement (lclRow);
6735 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6737 const size_t numBytesPerValue =
6738 PackTraits<ST>::packValueCount (valsIn[0]);
6739 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6740 gidsIn.data (), valsIn.data (),
6747 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6748 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6749 "First invalid offset into 'exports' pack buffer at index i = " << i
6750 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6751 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6756 numPacketsPerLID_h[i] = numBytes;
6761 std::ostringstream os;
6762 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6769 std::cerr << os.str ();
6773 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6775 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6776 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6777 const LocalOrdinal numEnt,
6778 const impl_scalar_type vals[],
6779 const GlobalOrdinal cols[],
6781 const char*
const prefix,
6785 using GO = GlobalOrdinal;
6789 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6790 Teuchos::ArrayView<const GO> cols_av
6791 (numEnt == 0 ?
nullptr : cols, numEnt);
6792 Teuchos::ArrayView<const Scalar> vals_av
6793 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6798 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6799 prefix, debug, verbose);
6803 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6805 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6806 combineGlobalValues(
6807 const GlobalOrdinal globalRowIndex,
6808 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6809 const Teuchos::ArrayView<const Scalar>& values,
6811 const char*
const prefix,
6815 const char tfecfFuncName[] =
"combineGlobalValues: ";
6817 if (isStaticGraph ()) {
6821 if (combineMode ==
ADD) {
6822 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6824 else if (combineMode ==
REPLACE) {
6825 replaceGlobalValues (globalRowIndex, columnIndices, values);
6827 else if (combineMode ==
ABSMAX) {
6828 using ::Tpetra::Details::AbsMax;
6830 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6834 else if (combineMode ==
INSERT) {
6835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6836 (isStaticGraph() && combineMode ==
INSERT,
6837 std::invalid_argument,
"INSERT combine mode is forbidden "
6838 "if the matrix has a static (const) graph (i.e., was "
6839 "constructed with the CrsMatrix constructor that takes a "
6840 "const CrsGraph pointer).");
6843 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6844 (
true, std::logic_error,
"Invalid combine mode; should "
6846 "Please report this bug to the Tpetra developers.");
6850 if (combineMode ==
ADD || combineMode ==
INSERT) {
6857 insertGlobalValuesFilteredChecked(globalRowIndex,
6858 columnIndices, values, prefix, debug, verbose);
6869 else if (combineMode ==
ABSMAX) {
6870 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6871 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6872 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6875 else if (combineMode ==
REPLACE) {
6876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6877 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6878 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6883 true, std::logic_error,
"Should never get here! Please report this "
6884 "bug to the Tpetra developers.");
6889 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6893 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6894 Kokkos::DualView<char*, buffer_device_type> imports,
6895 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6896 const size_t constantNumPackets,
6903 const char tfecfFuncName[] =
"unpackAndCombine: ";
6904 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6906 const bool debug = Behavior::debug(
"CrsMatrix");
6907 const bool verbose = Behavior::verbose(
"CrsMatrix");
6908 constexpr
int numValidModes = 5;
6911 const char* validModeNames[numValidModes] =
6912 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6914 std::unique_ptr<std::string> prefix;
6916 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6917 std::ostringstream os;
6918 os << *prefix <<
"Start:" << endl
6928 << *prefix <<
" constantNumPackets: " << constantNumPackets
6932 std::cerr << os.str ();
6936 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6937 validModes+numValidModes) {
6938 std::ostringstream os;
6939 os <<
"Invalid combine mode. Valid modes are {";
6940 for (
int k = 0; k < numValidModes; ++k) {
6941 os << validModeNames[k];
6942 if (k < numValidModes - 1) {
6947 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6948 (
true, std::invalid_argument, os.str ());
6950 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6951 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6952 std::invalid_argument,
"importLIDs.extent(0)="
6953 << importLIDs.extent(0)
6954 <<
" != numPacketsPerLID.extent(0)="
6955 << numPacketsPerLID.extent(0) <<
".");
6958 if (combineMode ==
ZERO) {
6963 using Teuchos::reduceAll;
6964 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6967 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6968 constantNumPackets, combineMode,
6970 }
catch (std::exception& e) {
6975 const Teuchos::Comm<int>& comm = * (this->getComm ());
6976 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6977 lclBad, Teuchos::outArg (gblBad));
6983 std::ostringstream os;
6984 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6985 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6986 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
6987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6988 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6989 "threw an exception on one or more participating processes: "
6990 << endl << msg->str ());
6994 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6995 constantNumPackets, combineMode,
7000 std::ostringstream os;
7001 os << *prefix <<
"Done!" << endl
7011 std::cerr << os.str ();
7015 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7019 const Kokkos::DualView<
const local_ordinal_type*,
7020 buffer_device_type>& importLIDs,
7021 Kokkos::DualView<char*, buffer_device_type> imports,
7022 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7023 const size_t constantNumPackets,
7028 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7032 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7033 std::unique_ptr<std::string> prefix;
7035 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7036 std::ostringstream os;
7037 os << *prefix <<
"isStaticGraph(): "
7038 << (isStaticGraph() ?
"true" :
"false")
7039 <<
", importLIDs.extent(0): "
7040 << importLIDs.extent(0)
7041 <<
", imports.extent(0): "
7042 << imports.extent(0)
7043 <<
", numPacketsPerLID.extent(0): "
7044 << numPacketsPerLID.extent(0)
7046 std::cerr << os.str();
7049 if (isStaticGraph ()) {
7050 using Details::unpackCrsMatrixAndCombineNew;
7051 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7052 importLIDs, constantNumPackets,
7057 using padding_type =
typename crs_graph_type::padding_type;
7058 std::unique_ptr<padding_type> padding;
7060 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7061 importLIDs, imports, numPacketsPerLID, verbose);
7063 catch (std::exception& e) {
7064 const auto rowMap = getRowMap();
7065 const auto comm = rowMap.is_null() ? Teuchos::null :
7067 const int myRank = comm.is_null() ? -1 : comm->getRank();
7068 TEUCHOS_TEST_FOR_EXCEPTION
7069 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7070 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7071 "threw an exception: " << e.what());
7074 std::ostringstream os;
7075 os << *prefix <<
"Call applyCrsPadding" << endl;
7076 std::cerr << os.str();
7078 applyCrsPadding(*padding, verbose);
7081 std::ostringstream os;
7082 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7083 std::cerr << os.str();
7085 unpackAndCombineImplNonStatic(importLIDs, imports,
7092 std::ostringstream os;
7093 os << *prefix <<
"Done" << endl;
7094 std::cerr << os.str();
7098 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7100 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7101 unpackAndCombineImplNonStatic(
7102 const Kokkos::DualView<
const local_ordinal_type*,
7103 buffer_device_type>& importLIDs,
7104 Kokkos::DualView<char*, buffer_device_type> imports,
7105 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7106 const size_t constantNumPackets,
7110 using Kokkos::subview;
7111 using Kokkos::MemoryUnmanaged;
7112 using Details::Behavior;
7115 using Details::PackTraits;
7116 using Details::ScalarViewTraits;
7118 using LO = LocalOrdinal;
7119 using GO = GlobalOrdinal;
7120 using ST = impl_scalar_type;
7121 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7123 typename View<int*, device_type>::HostMirror::execution_space;
7124 using pair_type = std::pair<typename View<int*, HES>::size_type,
7125 typename View<int*, HES>::size_type>;
7126 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7127 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7128 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7130 const bool debug = Behavior::debug(
"CrsMatrix");
7131 const bool verbose = Behavior::verbose(
"CrsMatrix");
7132 std::unique_ptr<std::string> prefix;
7134 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7135 std::ostringstream os;
7136 os << *prefix << endl;
7137 std::cerr << os.str ();
7139 const char*
const prefix_raw =
7140 verbose ? prefix.get()->c_str() :
nullptr;
7142 const size_type numImportLIDs = importLIDs.extent (0);
7143 if (combineMode ==
ZERO || numImportLIDs == 0) {
7147 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7148 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7153 if (imports.need_sync_host()) {
7154 imports.sync_host ();
7156 auto imports_h = imports.view_host();
7159 if (numPacketsPerLID.need_sync_host()) {
7160 numPacketsPerLID.sync_host ();
7162 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7164 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7165 auto importLIDs_h = importLIDs.view_host();
7167 size_t numBytesPerValue;
7178 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7183 size_t maxRowNumEnt = 0;
7184 for (size_type i = 0; i < numImportLIDs; ++i) {
7185 const size_t numBytes = numPacketsPerLID_h[i];
7186 if (numBytes == 0) {
7191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7192 (offset + numBytes >
size_t(imports_h.extent (0)),
7193 std::logic_error,
": At local row index importLIDs_h[i="
7194 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7195 <<
") + numBytes (=" << numBytes <<
") > "
7196 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7201 const size_t theNumBytes =
7203 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7204 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7205 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7207 const char*
const inBuf = imports_h.data () + offset;
7208 const size_t actualNumBytes =
7212 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7213 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7214 <<
", actualNumBytes=" << actualNumBytes
7215 <<
" > numBytes=" << numBytes <<
".");
7216 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7217 (numEntLO == 0, std::logic_error,
": At local row index "
7218 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7219 "the number of entries read from the packed data is "
7220 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7224 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7232 View<GO*, HES> gblColInds;
7233 View<LO*, HES> lclColInds;
7234 View<ST*, HES> vals;
7247 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7248 gid, maxRowNumEnt,
"gids");
7249 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7250 lid, maxRowNumEnt,
"lids");
7251 vals = ScalarViewTraits<ST, HES>::allocateArray(
7252 val, maxRowNumEnt,
"vals");
7256 for (size_type i = 0; i < numImportLIDs; ++i) {
7257 const size_t numBytes = numPacketsPerLID_h[i];
7258 if (numBytes == 0) {
7262 const char*
const inBuf = imports_h.data () + offset;
7265 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7266 const LO lclRow = importLIDs_h[i];
7268 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7269 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7271 const size_t numBytesOut =
7272 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7273 offset, numBytes, numEnt, numBytesPerValue);
7274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7275 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7276 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7277 << numBytesOut <<
".");
7279 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7280 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7281 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7282 combineMode, prefix_raw, debug, verbose);
7288 std::ostringstream os;
7289 os << *prefix <<
"Done" << endl;
7290 std::cerr << os.str();
7294 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7295 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7298 const bool force)
const
7300 using Teuchos::null;
7304 TEUCHOS_TEST_FOR_EXCEPTION(
7305 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7306 "MapMultiVector: You may only call this method if the matrix has a "
7307 "column Map. If the matrix does not yet have a column Map, you should "
7308 "first call fillComplete (with domain and range Map if necessary).");
7312 TEUCHOS_TEST_FOR_EXCEPTION(
7313 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7314 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7315 "this matrix's graph is fill complete.");
7318 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7319 RCP<const map_type> colMap = this->getColMap ();
7332 if (! importer.is_null () || force) {
7333 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7334 X_colMap = rcp (
new MV (colMap, numVecs));
7337 importMV_ = X_colMap;
7340 X_colMap = importMV_;
7351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7352 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7355 const bool force)
const
7357 using Teuchos::null;
7363 TEUCHOS_TEST_FOR_EXCEPTION(
7364 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7365 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7366 "matrix's graph is fill complete.");
7369 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7373 RCP<const map_type> rowMap = this->getRowMap ();
7385 if (! exporter.is_null () || force) {
7386 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7387 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7388 exportMV_ = Y_rowMap;
7391 Y_rowMap = exportMV_;
7397 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7402 TEUCHOS_TEST_FOR_EXCEPTION(
7403 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7404 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7405 "was created with a constant graph (that is, when it was created using "
7406 "the version of its constructor that takes an RCP<const CrsGraph>). "
7407 "This is because the matrix is not allowed to modify the graph in that "
7408 "case, but removing empty processes requires modifying the graph.");
7409 myGraph_->removeEmptyProcessesInPlace (newMap);
7413 this->map_ = this->getRowMap ();
7417 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7420 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7421 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7426 const Teuchos::RCP<const map_type>& domainMap,
7427 const Teuchos::RCP<const map_type>& rangeMap,
7428 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7430 using Teuchos::Array;
7431 using Teuchos::ArrayView;
7432 using Teuchos::ParameterList;
7435 using Teuchos::rcp_implicit_cast;
7436 using Teuchos::sublist;
7440 using crs_matrix_type =
7442 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7446 std::unique_ptr<std::string> prefix;
7448 prefix = this->createPrefix(
"CrsMatrix",
"add");
7449 std::ostringstream os;
7450 os << *prefix <<
"Start" << endl;
7451 std::cerr << os.str ();
7454 const crs_matrix_type& B = *
this;
7455 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7456 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7463 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7464 RCP<const map_type> B_domainMap = B.getDomainMap ();
7465 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7467 RCP<const map_type> theDomainMap = domainMap;
7468 RCP<const map_type> theRangeMap = rangeMap;
7470 if (domainMap.is_null ()) {
7471 if (B_domainMap.is_null ()) {
7472 TEUCHOS_TEST_FOR_EXCEPTION(
7473 A_domainMap.is_null (), std::invalid_argument,
7474 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7475 "then you must supply a nonnull domain Map to this method.");
7476 theDomainMap = A_domainMap;
7478 theDomainMap = B_domainMap;
7481 if (rangeMap.is_null ()) {
7482 if (B_rangeMap.is_null ()) {
7483 TEUCHOS_TEST_FOR_EXCEPTION(
7484 A_rangeMap.is_null (), std::invalid_argument,
7485 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7486 "then you must supply a nonnull range Map to this method.");
7487 theRangeMap = A_rangeMap;
7489 theRangeMap = B_rangeMap;
7497 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7498 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7499 TEUCHOS_TEST_FOR_EXCEPTION
7500 (! B_domainMap->isSameAs(*A_domainMap),
7501 std::invalid_argument,
7502 errPfx <<
"The input RowMatrix A must have a domain Map "
7503 "which is the same as (isSameAs) this RowMatrix's "
7505 TEUCHOS_TEST_FOR_EXCEPTION
7506 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7507 errPfx <<
"The input RowMatrix A must have a range Map "
7508 "which is the same as (isSameAs) this RowMatrix's range "
7510 TEUCHOS_TEST_FOR_EXCEPTION
7511 (! domainMap.is_null() &&
7512 ! domainMap->isSameAs(*B_domainMap),
7513 std::invalid_argument,
7514 errPfx <<
"The input domain Map must be the same as "
7515 "(isSameAs) this RowMatrix's domain Map.");
7516 TEUCHOS_TEST_FOR_EXCEPTION
7517 (! rangeMap.is_null() &&
7518 ! rangeMap->isSameAs(*B_rangeMap),
7519 std::invalid_argument,
7520 errPfx <<
"The input range Map must be the same as "
7521 "(isSameAs) this RowMatrix's range Map.");
7524 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7525 TEUCHOS_TEST_FOR_EXCEPTION
7526 (! domainMap.is_null() &&
7527 ! domainMap->isSameAs(*B_domainMap),
7528 std::invalid_argument,
7529 errPfx <<
"The input domain Map must be the same as "
7530 "(isSameAs) this RowMatrix's domain Map.");
7531 TEUCHOS_TEST_FOR_EXCEPTION
7532 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7533 std::invalid_argument,
7534 errPfx <<
"The input range Map must be the same as "
7535 "(isSameAs) this RowMatrix's range Map.");
7538 TEUCHOS_TEST_FOR_EXCEPTION
7539 (domainMap.is_null() || rangeMap.is_null(),
7540 std::invalid_argument, errPfx <<
"If neither A nor B "
7541 "have a domain and range Map, then you must supply a "
7542 "nonnull domain and range Map to this method.");
7549 bool callFillComplete =
true;
7550 RCP<ParameterList> constructorSublist;
7551 RCP<ParameterList> fillCompleteSublist;
7552 if (! params.is_null()) {
7554 params->get(
"Call fillComplete", callFillComplete);
7555 constructorSublist = sublist(params,
"Constructor parameters");
7556 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7559 RCP<const map_type> A_rowMap = A.
getRowMap ();
7560 RCP<const map_type> B_rowMap = B.getRowMap ();
7561 RCP<const map_type> C_rowMap = B_rowMap;
7562 RCP<crs_matrix_type> C;
7568 if (A_rowMap->isSameAs (*B_rowMap)) {
7569 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7570 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7573 if (alpha != ZERO) {
7574 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7576 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7581 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7582 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7583 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7587 if (constructorSublist.is_null ()) {
7588 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7590 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7591 constructorSublist));
7602 TEUCHOS_TEST_FOR_EXCEPTION
7603 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7604 "be the same for statically allocated matrices, to ensure "
7605 "that there is sufficient space to do the addition.");
7608 TEUCHOS_TEST_FOR_EXCEPTION
7609 (C.is_null (), std::logic_error,
7610 errPfx <<
"C should not be null at this point. "
7611 "Please report this bug to the Tpetra developers.");
7614 std::ostringstream os;
7615 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7616 std::cerr << os.str ();
7618 using gids_type = nonconst_global_inds_host_view_type;
7619 using vals_type = nonconst_values_host_view_type;
7623 if (alpha != ZERO) {
7624 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7625 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7627 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7628 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7629 Kokkos::resize(ind,A_numEntries);
7630 Kokkos::resize(val,A_numEntries);
7632 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7633 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7637 for (
size_t k = 0; k < A_numEntries; ++k) {
7638 valView[k] *= alpha;
7641 C->insertGlobalValues (globalRow, A_numEntries,
7642 reinterpret_cast<Scalar *>(valView.data()),
7648 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7649 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7650 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7651 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7652 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7653 Kokkos::resize(ind,B_numEntries);
7654 Kokkos::resize(val,B_numEntries);
7656 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7657 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7658 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7661 for (
size_t k = 0; k < B_numEntries; ++k) {
7665 C->insertGlobalValues (globalRow, B_numEntries,
7666 reinterpret_cast<Scalar *>(valView.data()),
7671 if (callFillComplete) {
7673 std::ostringstream os;
7674 os << *prefix <<
"Call fillComplete on C" << endl;
7675 std::cerr << os.str ();
7677 if (fillCompleteSublist.is_null ()) {
7678 C->fillComplete (theDomainMap, theRangeMap);
7680 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7684 std::ostringstream os;
7685 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7686 std::cerr << os.str ();
7690 std::ostringstream os;
7691 os << *prefix <<
"Done" << endl;
7692 std::cerr << os.str ();
7699 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7703 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7704 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7705 const Teuchos::RCP<const map_type>& domainMap,
7706 const Teuchos::RCP<const map_type>& rangeMap,
7707 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7714 using Teuchos::ArrayRCP;
7715 using Teuchos::ArrayView;
7716 using Teuchos::Comm;
7717 using Teuchos::ParameterList;
7720 typedef LocalOrdinal LO;
7721 typedef GlobalOrdinal GO;
7722 typedef node_type NT;
7727 const bool debug = Behavior::debug(
"CrsMatrix");
7728 const bool verbose = Behavior::verbose(
"CrsMatrix");
7729 int MyPID = getComm ()->getRank ();
7731 std::unique_ptr<std::string> verbosePrefix;
7734 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7735 std::ostringstream os;
7736 os <<
"Start" << endl;
7737 std::cerr << os.str();
7744 bool reverseMode =
false;
7745 bool restrictComm =
false;
7747 int mm_optimization_core_count =
7748 Behavior::TAFC_OptimizationCoreCount();
7749 RCP<ParameterList> matrixparams;
7750 bool overrideAllreduce =
false;
7751 bool useKokkosPath =
false;
7752 if (! params.is_null ()) {
7753 matrixparams = sublist (params,
"CrsMatrix");
7754 reverseMode = params->get (
"Reverse Mode", reverseMode);
7755 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7756 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7757 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7758 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7759 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7761 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7762 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7763 if(reverseMode) isMM =
false;
7767 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7769 int reduced_mismatch = 0;
7770 if (isMM && !overrideAllreduce) {
7773 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7774 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7775 rowTransfer.getRemoteLIDs ().size() == 0);
7776 mismatch = (source_vals != target_vals) ? 1 : 0;
7779 Teuchos::REDUCE_MAX, * (getComm ()));
7782 #ifdef HAVE_TPETRA_MMM_TIMINGS
7783 using Teuchos::TimeMonitor;
7785 if(!params.is_null())
7786 label = params->get(
"Timer Label",label);
7787 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7790 std::ostringstream os;
7791 if(isMM) os<<
":MMOpt";
7792 else os<<
":MMLegacy";
7796 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7804 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7805 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7806 TEUCHOS_TEST_FOR_EXCEPTION(
7807 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7808 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7809 "argument must be either an Import or an Export, and its template "
7810 "parameters must match the corresponding template parameters of the "
7818 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7819 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7821 if(! domainTransfer.is_null()) {
7822 TEUCHOS_TEST_FOR_EXCEPTION(
7823 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7824 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7825 "argument must be either an Import or an Export, and its template "
7826 "parameters must match the corresponding template parameters of the "
7829 TEUCHOS_TEST_FOR_EXCEPTION(
7830 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7831 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7832 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7833 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7834 "arguments must be of the same type (either Import or Export).");
7836 TEUCHOS_TEST_FOR_EXCEPTION(
7837 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7838 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7839 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7840 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7841 "arguments must be of the same type (either Import or Export).");
7847 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7851 RCP<const map_type> MyRowMap = reverseMode ?
7852 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7853 RCP<const map_type> MyColMap;
7854 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7855 domainMap : getDomainMap ();
7856 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7857 rangeMap : getRangeMap ();
7858 RCP<const map_type> BaseRowMap = MyRowMap;
7859 RCP<const map_type> BaseDomainMap = MyDomainMap;
7867 if (! destMat.is_null ()) {
7878 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7879 ! destMat->getGraph ()->isGloballyIndexed ();
7880 TEUCHOS_TEST_FOR_EXCEPTION(
7881 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7882 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7883 "to be nonnull, if its graph is empty (neither locally nor globally "
7892 TEUCHOS_TEST_FOR_EXCEPTION(
7893 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7894 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7895 "input argument 'destMat' is not the same as the (row) Map specified "
7896 "by the input argument 'rowTransfer'.");
7897 TEUCHOS_TEST_FOR_EXCEPTION(
7898 ! destMat->checkSizes (*
this), std::invalid_argument,
7899 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7900 "destination matrix, but checkSizes() indicates that it is not a legal "
7901 "legal target for redistribution from the source matrix (*this). This "
7902 "may mean that they do not have the same dimensions.");
7916 TEUCHOS_TEST_FOR_EXCEPTION(
7917 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7918 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7919 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7920 TEUCHOS_TEST_FOR_EXCEPTION(
7921 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7922 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7923 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7926 TEUCHOS_TEST_FOR_EXCEPTION(
7927 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7928 std::invalid_argument,
7929 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7930 "argument must be the same as the rebalanced domain map 'domainMap'");
7932 TEUCHOS_TEST_FOR_EXCEPTION(
7933 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7934 std::invalid_argument,
7935 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7936 "argument must be the same as the rebalanced domain map 'domainMap'");
7949 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7950 ArrayView<const LO> ExportLIDs = reverseMode ?
7951 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7952 auto RemoteLIDs = reverseMode ?
7953 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7954 auto PermuteToLIDs = reverseMode ?
7955 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7956 auto PermuteFromLIDs = reverseMode ?
7957 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7958 Distributor& Distor = rowTransfer.getDistributor ();
7961 Teuchos::Array<int> SourcePids;
7964 RCP<const map_type> ReducedRowMap, ReducedColMap,
7965 ReducedDomainMap, ReducedRangeMap;
7966 RCP<const Comm<int> > ReducedComm;
7970 if (destMat.is_null ()) {
7971 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7978 #ifdef HAVE_TPETRA_MMM_TIMINGS
7979 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7981 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7982 ReducedComm = ReducedRowMap.is_null () ?
7984 ReducedRowMap->getComm ();
7985 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7987 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7989 MyDomainMap->replaceCommWithSubset (ReducedComm);
7990 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7992 MyRangeMap->replaceCommWithSubset (ReducedComm);
7995 MyRowMap = ReducedRowMap;
7996 MyDomainMap = ReducedDomainMap;
7997 MyRangeMap = ReducedRangeMap;
8000 if (! ReducedComm.is_null ()) {
8001 MyPID = ReducedComm->getRank ();
8008 ReducedComm = MyRowMap->getComm ();
8017 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8020 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8022 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8023 #ifdef HAVE_TPETRA_MMM_TIMINGS
8024 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8032 Import_Util::getPids (*MyImporter, SourcePids,
false);
8034 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8037 #ifdef HAVE_TPETRA_MMM_TIMINGS
8038 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8040 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8041 IntVectorType SourceCol_pids(getColMap());
8043 SourceDomain_pids.putScalar(MyPID);
8045 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8046 SourcePids.resize (getColMap ()->getLocalNumElements ());
8047 SourceCol_pids.get1dCopy (SourcePids ());
8049 else if (MyImporter.is_null ()) {
8051 #ifdef HAVE_TPETRA_MMM_TIMINGS
8052 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8054 SourcePids.resize (getColMap ()->getLocalNumElements ());
8055 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8057 else if ( ! MyImporter.is_null () &&
8058 ! domainTransfer.is_null () ) {
8063 #ifdef HAVE_TPETRA_MMM_TIMINGS
8064 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8068 IntVectorType TargetDomain_pids (domainMap);
8069 TargetDomain_pids.putScalar (MyPID);
8072 IntVectorType SourceDomain_pids (getDomainMap ());
8075 IntVectorType SourceCol_pids (getColMap ());
8077 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8078 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8080 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8081 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8083 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8084 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8086 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8087 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8090 TEUCHOS_TEST_FOR_EXCEPTION(
8091 true, std::logic_error,
"Tpetra::CrsMatrix::"
8092 "transferAndFillComplete: Should never get here! "
8093 "Please report this bug to a Tpetra developer.");
8095 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8096 SourcePids.resize (getColMap ()->getLocalNumElements ());
8097 SourceCol_pids.get1dCopy (SourcePids ());
8099 else if ( ! MyImporter.is_null () &&
8100 BaseDomainMap->isSameAs (*BaseRowMap) &&
8101 getDomainMap ()->isSameAs (*getRowMap ())) {
8103 #ifdef HAVE_TPETRA_MMM_TIMINGS
8104 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8107 IntVectorType TargetRow_pids (domainMap);
8108 IntVectorType SourceRow_pids (getRowMap ());
8109 IntVectorType SourceCol_pids (getColMap ());
8111 TargetRow_pids.putScalar (MyPID);
8112 if (! reverseMode && xferAsImport !=
nullptr) {
8113 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8115 else if (reverseMode && xferAsExport !=
nullptr) {
8116 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8118 else if (! reverseMode && xferAsExport !=
nullptr) {
8119 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8121 else if (reverseMode && xferAsImport !=
nullptr) {
8122 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8125 TEUCHOS_TEST_FOR_EXCEPTION(
8126 true, std::logic_error,
"Tpetra::CrsMatrix::"
8127 "transferAndFillComplete: Should never get here! "
8128 "Please report this bug to a Tpetra developer.");
8131 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8132 SourcePids.resize (getColMap ()->getLocalNumElements ());
8133 SourceCol_pids.get1dCopy (SourcePids ());
8136 TEUCHOS_TEST_FOR_EXCEPTION(
8137 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8138 "transferAndFillComplete: This method only allows either domainMap == "
8139 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8140 "getDomainMap () == getRowMap ()).");
8144 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8146 #ifdef HAVE_TPETRA_MMM_TIMINGS
8147 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8149 if (constantNumPackets == 0) {
8150 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8151 RemoteLIDs.view_host().size ());
8158 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8159 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8165 #ifdef HAVE_TPETRA_MMM_TIMINGS
8166 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8169 using Teuchos::outArg;
8170 using Teuchos::REDUCE_MAX;
8171 using Teuchos::reduceAll;
8174 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8175 const int myRank = comm->getRank ();
8177 std::ostringstream errStrm;
8181 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8184 destMat->numExportPacketsPerLID_.modify_host ();
8185 numExportPacketsPerLID =
8188 catch (std::exception& e) {
8189 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8190 << e.what () << std::endl;
8194 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8195 "an exception not a subclass of std::exception" << std::endl;
8199 if (! comm.is_null ()) {
8200 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8203 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8204 TEUCHOS_TEST_FOR_EXCEPTION(
8205 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8206 "exception on at least one process.");
8210 std::ostringstream os;
8211 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8213 std::cerr << os.str ();
8218 numExportPacketsPerLID,
8221 constantNumPackets);
8223 catch (std::exception& e) {
8224 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8225 << e.what () << std::endl;
8229 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8230 "an exception not a subclass of std::exception" << std::endl;
8235 std::ostringstream os;
8236 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8238 std::cerr << os.str ();
8241 if (! comm.is_null ()) {
8242 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8245 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8246 TEUCHOS_TEST_FOR_EXCEPTION(
8247 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8248 "exception on at least one process.");
8253 destMat->numExportPacketsPerLID_.modify_host ();
8254 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8257 std::ostringstream os;
8258 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8260 std::cerr << os.str ();
8264 numExportPacketsPerLID,
8267 constantNumPackets);
8269 std::ostringstream os;
8270 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8272 std::cerr << os.str ();
8279 #ifdef HAVE_TPETRA_MMM_TIMINGS
8280 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8282 if (! communication_needed) {
8284 std::ostringstream os;
8285 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8286 std::cerr << os.str ();
8291 if (constantNumPackets == 0) {
8293 std::ostringstream os;
8294 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8296 std::cerr << os.str ();
8301 destMat->numExportPacketsPerLID_.sync_host ();
8302 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8304 destMat->numImportPacketsPerLID_.sync_host ();
8305 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8309 std::ostringstream os;
8310 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8312 std::cerr << os.str ();
8314 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8315 destMat->numImportPacketsPerLID_.view_host());
8317 std::ostringstream os;
8318 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8320 std::cerr << os.str ();
8323 size_t totalImportPackets = 0;
8324 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8325 totalImportPackets += numImportPacketsPerLID[i];
8330 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8331 verbosePrefix.get ());
8332 destMat->imports_.modify_host ();
8333 auto hostImports = destMat->imports_.view_host();
8336 destMat->exports_.sync_host ();
8337 auto hostExports = destMat->exports_.view_host();
8339 std::ostringstream os;
8340 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8342 std::cerr << os.str ();
8344 Distor.doReversePostsAndWaits (hostExports,
8345 numExportPacketsPerLID,
8347 numImportPacketsPerLID);
8349 std::ostringstream os;
8350 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8352 std::cerr << os.str ();
8357 std::ostringstream os;
8358 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8360 std::cerr << os.str ();
8362 destMat->imports_.modify_host ();
8363 auto hostImports = destMat->imports_.view_host();
8366 destMat->exports_.sync_host ();
8367 auto hostExports = destMat->exports_.view_host();
8369 std::ostringstream os;
8370 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8372 std::cerr << os.str ();
8374 Distor.doReversePostsAndWaits (hostExports,
8378 std::ostringstream os;
8379 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8381 std::cerr << os.str ();
8386 if (constantNumPackets == 0) {
8388 std::ostringstream os;
8389 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8391 std::cerr << os.str ();
8396 destMat->numExportPacketsPerLID_.sync_host ();
8397 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8399 destMat->numImportPacketsPerLID_.sync_host ();
8400 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8403 std::ostringstream os;
8404 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8406 std::cerr << os.str ();
8408 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8409 destMat->numImportPacketsPerLID_.view_host());
8411 std::ostringstream os;
8412 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8414 std::cerr << os.str ();
8417 size_t totalImportPackets = 0;
8418 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8419 totalImportPackets += numImportPacketsPerLID[i];
8424 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8425 verbosePrefix.get ());
8426 destMat->imports_.modify_host ();
8427 auto hostImports = destMat->imports_.view_host();
8430 destMat->exports_.sync_host ();
8431 auto hostExports = destMat->exports_.view_host();
8433 std::ostringstream os;
8434 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8436 std::cerr << os.str ();
8438 Distor.doPostsAndWaits (hostExports,
8439 numExportPacketsPerLID,
8441 numImportPacketsPerLID);
8443 std::ostringstream os;
8444 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8446 std::cerr << os.str ();
8451 std::ostringstream os;
8452 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8454 std::cerr << os.str ();
8456 destMat->imports_.modify_host ();
8457 auto hostImports = destMat->imports_.view_host();
8460 destMat->exports_.sync_host ();
8461 auto hostExports = destMat->exports_.view_host();
8463 std::ostringstream os;
8464 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8466 std::cerr << os.str ();
8468 Distor.doPostsAndWaits (hostExports,
8472 std::ostringstream os;
8473 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8475 std::cerr << os.str ();
8486 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8488 Teuchos::Array<int> RemotePids;
8490 Teuchos::Array<int> TargetPids;
8496 destMat->numImportPacketsPerLID_.modify_host();
8498 # ifdef HAVE_TPETRA_MMM_TIMINGS
8499 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8501 ArrayRCP<size_t> CSR_rowptr;
8502 ArrayRCP<GO> CSR_colind_GID;
8503 ArrayRCP<LO> CSR_colind_LID;
8504 ArrayRCP<Scalar> CSR_vals;
8506 destMat->imports_.sync_device ();
8507 destMat->numImportPacketsPerLID_.sync_device ();
8509 size_t N = BaseRowMap->getLocalNumElements ();
8511 auto RemoteLIDs_d = RemoteLIDs.view_device();
8512 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8513 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8518 destMat->imports_.view_device(),
8519 destMat->numImportPacketsPerLID_.view_device(),
8533 if (
typeid (LO) ==
typeid (GO)) {
8534 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8537 CSR_colind_LID.resize (CSR_colind_GID.size());
8539 CSR_colind_LID.resize (CSR_colind_GID.size());
8544 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8546 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8548 #ifdef HAVE_TPETRA_MMM_TIMINGS
8549 tmCopySPRdata = Teuchos::null;
8558 std::ostringstream os;
8559 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8561 std::cerr << os.str ();
8564 #ifdef HAVE_TPETRA_MMM_TIMINGS
8565 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8567 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8577 std::ostringstream os;
8578 os << *verbosePrefix <<
"restrictComm="
8579 << (restrictComm ?
"true" :
"false") << std::endl;
8580 std::cerr << os.str ();
8587 #ifdef HAVE_TPETRA_MMM_TIMINGS
8588 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8591 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8593 MyColMap->replaceCommWithSubset (ReducedComm);
8594 MyColMap = ReducedColMap;
8599 std::ostringstream os;
8600 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8601 std::cerr << os.str ();
8603 destMat->replaceColMap (MyColMap);
8610 if (ReducedComm.is_null ()) {
8612 std::ostringstream os;
8613 os << *verbosePrefix <<
"I am no longer in the communicator; "
8614 "returning" << std::endl;
8615 std::cerr << os.str ();
8624 if ((! reverseMode && xferAsImport !=
nullptr) ||
8625 (reverseMode && xferAsExport !=
nullptr)) {
8627 std::ostringstream os;
8628 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8629 std::cerr << os.str ();
8631 #ifdef HAVE_TPETRA_MMM_TIMINGS
8632 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8634 Import_Util::sortCrsEntries (CSR_rowptr(),
8638 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8639 (reverseMode && xferAsImport !=
nullptr)) {
8641 std::ostringstream os;
8642 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8644 std::cerr << os.str();
8646 #ifdef HAVE_TPETRA_MMM_TIMINGS
8647 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8649 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8652 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8653 CSR_colind_LID.resize (CSR_rowptr[N]);
8654 CSR_vals.resize (CSR_rowptr[N]);
8658 TEUCHOS_TEST_FOR_EXCEPTION(
8659 true, std::logic_error,
"Tpetra::CrsMatrix::"
8660 "transferAndFillComplete: Should never get here! "
8661 "Please report this bug to a Tpetra developer.");
8668 std::ostringstream os;
8669 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8670 std::cerr << os.str ();
8679 #ifdef HAVE_TPETRA_MMM_TIMINGS
8680 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8682 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8694 destMat->numImportPacketsPerLID_.modify_host();
8696 # ifdef HAVE_TPETRA_MMM_TIMINGS
8697 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8699 ArrayRCP<size_t> CSR_rowptr;
8700 ArrayRCP<GO> CSR_colind_GID;
8701 ArrayRCP<LO> CSR_colind_LID;
8702 ArrayRCP<Scalar> CSR_vals;
8704 destMat->imports_.sync_device ();
8705 destMat->numImportPacketsPerLID_.sync_device ();
8707 size_t N = BaseRowMap->getLocalNumElements ();
8709 auto RemoteLIDs_d = RemoteLIDs.view_device();
8710 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8711 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8713 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8714 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8715 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8716 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8717 Kokkos::View<int*,device_type> TargetPids_d;
8722 destMat->imports_.view_device(),
8723 destMat->numImportPacketsPerLID_.view_device(),
8735 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8737 #ifdef HAVE_TPETRA_MMM_TIMINGS
8738 tmCopySPRdata = Teuchos::null;
8747 std::ostringstream os;
8748 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8750 std::cerr << os.str ();
8753 #ifdef HAVE_TPETRA_MMM_TIMINGS
8754 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8756 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8766 std::ostringstream os;
8767 os << *verbosePrefix <<
"restrictComm="
8768 << (restrictComm ?
"true" :
"false") << std::endl;
8769 std::cerr << os.str ();
8776 #ifdef HAVE_TPETRA_MMM_TIMINGS
8777 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8780 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8782 MyColMap->replaceCommWithSubset (ReducedComm);
8783 MyColMap = ReducedColMap;
8788 std::ostringstream os;
8789 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8790 std::cerr << os.str ();
8792 destMat->replaceColMap (MyColMap);
8799 if (ReducedComm.is_null ()) {
8801 std::ostringstream os;
8802 os << *verbosePrefix <<
"I am no longer in the communicator; "
8803 "returning" << std::endl;
8804 std::cerr << os.str ();
8814 if ((! reverseMode && xferAsImport !=
nullptr) ||
8815 (reverseMode && xferAsExport !=
nullptr)) {
8817 std::ostringstream os;
8818 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8819 std::cerr << os.str ();
8821 #ifdef HAVE_TPETRA_MMM_TIMINGS
8822 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8824 Import_Util::sortCrsEntries (CSR_rowptr_d,
8828 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8829 (reverseMode && xferAsImport !=
nullptr)) {
8831 std::ostringstream os;
8832 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8834 std::cerr << os.str();
8836 #ifdef HAVE_TPETRA_MMM_TIMINGS
8837 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8839 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8844 TEUCHOS_TEST_FOR_EXCEPTION(
8845 true, std::logic_error,
"Tpetra::CrsMatrix::"
8846 "transferAndFillComplete: Should never get here! "
8847 "Please report this bug to a Tpetra developer.");
8855 std::ostringstream os;
8856 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8857 std::cerr << os.str ();
8861 #ifdef HAVE_TPETRA_MMM_TIMINGS
8862 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8864 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8872 #ifdef HAVE_TPETRA_MMM_TIMINGS
8873 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8876 Teuchos::ParameterList esfc_params;
8878 RCP<import_type> MyImport;
8881 if (iallreduceRequest.get () !=
nullptr) {
8883 std::ostringstream os;
8884 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8886 std::cerr << os.str ();
8888 iallreduceRequest->wait ();
8889 if (reduced_mismatch != 0) {
8895 #ifdef HAVE_TPETRA_MMM_TIMINGS
8896 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8901 std::ostringstream os;
8902 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8903 std::cerr << os.str ();
8906 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8907 Teuchos::ArrayRCP<int> type3PIDs;
8908 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8909 auto colind = getCrsGraph()->getLocalIndicesHost();
8912 std::ostringstream os;
8913 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8914 std::cerr << os.str ();
8918 #ifdef HAVE_TPETRA_MMM_TIMINGS
8919 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8921 Import_Util::reverseNeighborDiscovery(*
this,
8933 std::ostringstream os;
8934 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8935 std::cerr << os.str ();
8938 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8939 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8941 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8942 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8944 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8946 std::vector<bool> IsOwned(numCols,
true);
8947 std::vector<int> SentTo(numCols,-1);
8948 if (! MyImporter.is_null ()) {
8949 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8950 IsOwned[rlid]=
false;
8954 std::vector<std::pair<int,GO> > usrtg;
8955 usrtg.reserve(TEPID2.size());
8958 const auto& colMap = * (this->getColMap ());
8960 const LO row = TELID2[i];
8961 const int pid = TEPID2[i];
8962 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8963 const int col = colind[j];
8964 if (IsOwned[col] && SentTo[col] != pid) {
8966 GO gid = colMap.getGlobalElement (col);
8967 usrtg.push_back (std::pair<int,GO> (pid, gid));
8975 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8977 usrtg.erase(eopg,usrtg.end());
8980 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8981 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8984 for(
auto && p : usrtg) {
8985 EPID2[pos]= p.first;
8986 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8990 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8991 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8992 GO InfGID = std::numeric_limits<GO>::max();
8993 int InfPID = INT_MAX;
8996 #endif // TPETRA_MIN3
8997 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8998 int i1=0, i2=0, i3=0;
8999 int Len1 = EPID1.size();
9000 int Len2 = EPID2.size();
9001 int Len3 = EPID3.size();
9003 int MyLen=Len1+Len2+Len3;
9004 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9005 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9008 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9009 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9010 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9011 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9013 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9014 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9015 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9017 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9018 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9021 #endif // TPETRA_MIN3
9022 bool added_entry=
false;
9024 if(PID1 == MIN_PID && GID1 == MIN_GID){
9025 userExportLIDs[iloc]=ELID1[i1];
9026 userExportPIDs[iloc]=EPID1[i1];
9031 if(PID2 == MIN_PID && GID2 == MIN_GID){
9033 userExportLIDs[iloc]=ELID2[i2];
9034 userExportPIDs[iloc]=EPID2[i2];
9040 if(PID3 == MIN_PID && GID3 == MIN_GID){
9042 userExportLIDs[iloc]=ELID3[i3];
9043 userExportPIDs[iloc]=EPID3[i3];
9051 std::ostringstream os;
9052 os << *verbosePrefix <<
"Create Import" << std::endl;
9053 std::cerr << os.str ();
9056 #ifdef HAVE_TPETRA_MMM_TIMINGS
9057 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9059 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9061 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9062 MyImport = rcp (
new import_type (MyDomainMap,
9065 userExportLIDs.view(0,iloc).getConst(),
9066 userExportPIDs.view(0,iloc).getConst(),
9071 std::ostringstream os;
9072 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9073 std::cerr << os.str ();
9077 #ifdef HAVE_TPETRA_MMM_TIMINGS
9078 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9079 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9081 if(!params.is_null())
9082 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9083 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9089 #ifdef HAVE_TPETRA_MMM_TIMINGS
9090 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9093 std::ostringstream os;
9094 os << *verbosePrefix <<
"Create Import" << std::endl;
9095 std::cerr << os.str ();
9098 #ifdef HAVE_TPETRA_MMM_TIMINGS
9099 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9101 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9102 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9103 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9104 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9107 std::ostringstream os;
9108 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9109 std::cerr << os.str ();
9112 #ifdef HAVE_TPETRA_MMM_TIMINGS
9113 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9114 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9116 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9119 if (!params.is_null ()) {
9120 esfc_params.set (
"compute global constants",
9121 params->get (
"compute global constants",
true));
9123 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9124 MyImport, Teuchos::null,
9125 rcp (
new Teuchos::ParameterList (esfc_params)));
9128 #ifdef HAVE_TPETRA_MMM_TIMINGS
9129 tmIESFC = Teuchos::null;
9133 std::ostringstream os;
9134 os << *verbosePrefix <<
"Done" << endl;
9135 std::cerr << os.str ();
9140 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9145 const Teuchos::RCP<const map_type>& domainMap,
9146 const Teuchos::RCP<const map_type>& rangeMap,
9147 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9149 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9152 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9158 const Teuchos::RCP<const map_type>& domainMap,
9159 const Teuchos::RCP<const map_type>& rangeMap,
9160 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9162 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9165 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9170 const Teuchos::RCP<const map_type>& domainMap,
9171 const Teuchos::RCP<const map_type>& rangeMap,
9172 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9174 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9177 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9183 const Teuchos::RCP<const map_type>& domainMap,
9184 const Teuchos::RCP<const map_type>& rangeMap,
9185 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9187 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9198 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9200 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9202 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9204 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9205 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9207 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9209 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9210 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9211 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9212 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9213 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9214 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9215 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9216 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9217 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9218 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9219 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9220 const Teuchos::RCP<Teuchos::ParameterList>& params);
9222 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9224 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9225 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9226 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9227 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9228 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9229 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9230 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9231 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9232 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9233 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9234 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9235 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9236 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9237 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9238 const Teuchos::RCP<Teuchos::ParameterList>& params);
9241 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9243 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9244 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9245 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9246 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9247 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9248 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9249 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9250 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9251 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9253 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9254 const Teuchos::RCP<Teuchos::ParameterList>& params);
9256 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9258 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9259 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9260 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9261 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9262 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9263 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9264 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9265 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9266 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9267 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9268 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9269 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9270 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9271 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9272 const Teuchos::RCP<Teuchos::ParameterList>& params);
9275 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9276 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9277 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9278 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9279 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9280 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9282 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.